1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2019 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48 #ifdef SUPPORT_JIT
49
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78
79 #include "sljit/sljitLir.c"
80
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84
85 /* Defines for debugging purposes. */
86
87 /* 1 - Use unoptimized capturing brackets.
88 2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115
116 'ab' - 'a' and 'b' regexps are concatenated
117 'a+' - 'a' is the sub-expression of the '+' operator
118
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124
125 Greedy star operator (*) :
126 Matching path: match happens.
127 Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129 Matching path: no need to perform a match.
130 Backtrack path: match is required.
131
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135
136 A(B|C)D
137
138 The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161 */
162
163 /*
164 Saved stack frames:
165
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174 Thus we can restore the private data to a particular point in the stack.
175 */
176
177 typedef struct jit_arguments {
178 /* Pointers first. */
179 struct sljit_stack *stack;
180 PCRE2_SPTR str;
181 PCRE2_SPTR begin;
182 PCRE2_SPTR end;
183 pcre2_match_data *match_data;
184 PCRE2_SPTR startchar_ptr;
185 PCRE2_UCHAR *mark_ptr;
186 int (*callout)(pcre2_callout_block *, void *);
187 void *callout_data;
188 /* Everything else after. */
189 sljit_uw offset_limit;
190 sljit_u32 limit_match;
191 sljit_u32 oveccount;
192 sljit_u32 options;
193 } jit_arguments;
194
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196
197 typedef struct executable_functions {
198 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201 sljit_u32 top_bracket;
202 sljit_u32 limit_match;
203 } executable_functions;
204
205 typedef struct jump_list {
206 struct sljit_jump *jump;
207 struct jump_list *next;
208 } jump_list;
209
210 typedef struct stub_list {
211 struct sljit_jump *start;
212 struct sljit_label *quit;
213 struct stub_list *next;
214 } stub_list;
215
216 enum frame_types {
217 no_frame = -1,
218 no_stack = -2
219 };
220
221 enum control_types {
222 type_mark = 0,
223 type_then_trap = 1
224 };
225
226 enum early_fail_types {
227 type_skip = 0,
228 type_fail = 1,
229 type_fail_range = 2
230 };
231
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239 /* Concatenation stack. */
240 struct backtrack_common *prev;
241 jump_list *nextbacktracks;
242 /* Internal stack (for component operators). */
243 struct backtrack_common *top;
244 jump_list *topbacktracks;
245 /* Opcode pointer. */
246 PCRE2_SPTR cc;
247 } backtrack_common;
248
249 typedef struct assert_backtrack {
250 backtrack_common common;
251 jump_list *condfailed;
252 /* Less than 0 if a frame is not needed. */
253 int framesize;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 /* For iterators. */
257 struct sljit_label *matchingpath;
258 } assert_backtrack;
259
260 typedef struct bracket_backtrack {
261 backtrack_common common;
262 /* Where to coninue if an alternative is successfully matched. */
263 struct sljit_label *alternative_matchingpath;
264 /* For rmin and rmax iterators. */
265 struct sljit_label *recursive_matchingpath;
266 /* For greedy ? operator. */
267 struct sljit_label *zero_matchingpath;
268 /* Contains the branches of a failed condition. */
269 union {
270 /* Both for OP_COND, OP_SCOND. */
271 jump_list *condfailed;
272 assert_backtrack *assert;
273 /* For OP_ONCE. Less than 0 if not needed. */
274 int framesize;
275 /* For brackets with >3 alternatives. */
276 struct sljit_put_label *matching_put_label;
277 } u;
278 /* Points to our private memory word on the stack. */
279 int private_data_ptr;
280 } bracket_backtrack;
281
282 typedef struct bracketpos_backtrack {
283 backtrack_common common;
284 /* Points to our private memory word on the stack. */
285 int private_data_ptr;
286 /* Reverting stack is needed. */
287 int framesize;
288 /* Allocated stack size. */
289 int stacksize;
290 } bracketpos_backtrack;
291
292 typedef struct braminzero_backtrack {
293 backtrack_common common;
294 struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296
297 typedef struct char_iterator_backtrack {
298 backtrack_common common;
299 /* Next iteration. */
300 struct sljit_label *matchingpath;
301 union {
302 jump_list *backtracks;
303 struct {
304 unsigned int othercasebit;
305 PCRE2_UCHAR chr;
306 BOOL enabled;
307 } charpos;
308 } u;
309 } char_iterator_backtrack;
310
311 typedef struct ref_iterator_backtrack {
312 backtrack_common common;
313 /* Next iteration. */
314 struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316
317 typedef struct recurse_entry {
318 struct recurse_entry *next;
319 /* Contains the function entry label. */
320 struct sljit_label *entry_label;
321 /* Contains the function entry label. */
322 struct sljit_label *backtrack_label;
323 /* Collects the entry calls until the function is not created. */
324 jump_list *entry_calls;
325 /* Collects the backtrack calls until the function is not created. */
326 jump_list *backtrack_calls;
327 /* Points to the starting opcode. */
328 sljit_sw start;
329 } recurse_entry;
330
331 typedef struct recurse_backtrack {
332 backtrack_common common;
333 /* Return to the matching path. */
334 struct sljit_label *matchingpath;
335 /* Recursive pattern. */
336 recurse_entry *entry;
337 /* Pattern is inlined. */
338 BOOL inlined_pattern;
339 } recurse_backtrack;
340
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342
343 typedef struct then_trap_backtrack {
344 backtrack_common common;
345 /* If then_trap is not NULL, this structure contains the real
346 then_trap for the backtracking path. */
347 struct then_trap_backtrack *then_trap;
348 /* Points to the starting opcode. */
349 sljit_sw start;
350 /* Exit point for the then opcodes of this alternative. */
351 jump_list *quit;
352 /* Frame size of the current alternative. */
353 int framesize;
354 } then_trap_backtrack;
355
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358
359 typedef struct fast_forward_char_data {
360 /* Number of characters in the chars array, 255 for any character. */
361 sljit_u8 count;
362 /* Number of last UTF-8 characters in the chars array. */
363 sljit_u8 last_count;
364 /* Available characters in the current position. */
365 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370
371 typedef struct compiler_common {
372 /* The sljit ceneric compiler. */
373 struct sljit_compiler *compiler;
374 /* Compiled regular expression. */
375 pcre2_real_code *re;
376 /* First byte code. */
377 PCRE2_SPTR start;
378 /* Maps private data offset to each opcode. */
379 sljit_s32 *private_data_ptrs;
380 /* Chain list of read-only data ptrs. */
381 void *read_only_data_head;
382 /* Tells whether the capturing bracket is optimized. */
383 sljit_u8 *optimized_cbracket;
384 /* Tells whether the starting offset is a target of then. */
385 sljit_u8 *then_offsets;
386 /* Current position where a THEN must jump. */
387 then_trap_backtrack *then_trap;
388 /* Starting offset of private data for capturing brackets. */
389 sljit_s32 cbra_ptr;
390 /* Output vector starting point. Must be divisible by 2. */
391 sljit_s32 ovector_start;
392 /* Points to the starting character of the current match. */
393 sljit_s32 start_ptr;
394 /* Last known position of the requested byte. */
395 sljit_s32 req_char_ptr;
396 /* Head of the last recursion. */
397 sljit_s32 recursive_head_ptr;
398 /* First inspected character for partial matching.
399 (Needed for avoiding zero length partial matches.) */
400 sljit_s32 start_used_ptr;
401 /* Starting pointer for partial soft matches. */
402 sljit_s32 hit_start;
403 /* Pointer of the match end position. */
404 sljit_s32 match_end_ptr;
405 /* Points to the marked string. */
406 sljit_s32 mark_ptr;
407 /* Recursive control verb management chain. */
408 sljit_s32 control_head_ptr;
409 /* Points to the last matched capture block index. */
410 sljit_s32 capture_last_ptr;
411 /* Fast forward skipping byte code pointer. */
412 PCRE2_SPTR fast_forward_bc_ptr;
413 /* Locals used by fast fail optimization. */
414 sljit_s32 early_fail_start_ptr;
415 sljit_s32 early_fail_end_ptr;
416
417 /* Flipped and lower case tables. */
418 const sljit_u8 *fcc;
419 sljit_sw lcc;
420 /* Mode can be PCRE2_JIT_COMPLETE and others. */
421 int mode;
422 /* TRUE, when empty match is accepted for partial matching. */
423 BOOL allow_empty_partial;
424 /* TRUE, when minlength is greater than 0. */
425 BOOL might_be_empty;
426 /* \K is found in the pattern. */
427 BOOL has_set_som;
428 /* (*SKIP:arg) is found in the pattern. */
429 BOOL has_skip_arg;
430 /* (*THEN) is found in the pattern. */
431 BOOL has_then;
432 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433 BOOL has_skip_in_assert_back;
434 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435 BOOL local_quit_available;
436 /* Currently in a positive assertion. */
437 BOOL in_positive_assertion;
438 /* Newline control. */
439 int nltype;
440 sljit_u32 nlmax;
441 sljit_u32 nlmin;
442 int newline;
443 int bsr_nltype;
444 sljit_u32 bsr_nlmax;
445 sljit_u32 bsr_nlmin;
446 /* Dollar endonly. */
447 int endonly;
448 /* Tables. */
449 sljit_sw ctypes;
450 /* Named capturing brackets. */
451 PCRE2_SPTR name_table;
452 sljit_sw name_count;
453 sljit_sw name_entry_size;
454
455 /* Labels and jump lists. */
456 struct sljit_label *partialmatchlabel;
457 struct sljit_label *quit_label;
458 struct sljit_label *abort_label;
459 struct sljit_label *accept_label;
460 struct sljit_label *ff_newline_shortcut;
461 stub_list *stubs;
462 recurse_entry *entries;
463 recurse_entry *currententry;
464 jump_list *partialmatch;
465 jump_list *quit;
466 jump_list *positive_assertion_quit;
467 jump_list *abort;
468 jump_list *failed_match;
469 jump_list *accept;
470 jump_list *calllimit;
471 jump_list *stackalloc;
472 jump_list *revertframes;
473 jump_list *wordboundary;
474 jump_list *anynewline;
475 jump_list *hspace;
476 jump_list *vspace;
477 jump_list *casefulcmp;
478 jump_list *caselesscmp;
479 jump_list *reset_match;
480 BOOL unset_backref;
481 BOOL alt_circumflex;
482 #ifdef SUPPORT_UNICODE
483 BOOL utf;
484 BOOL invalid_utf;
485 BOOL ucp;
486 /* Points to saving area for iref. */
487 sljit_s32 iref_ptr;
488 jump_list *getucd;
489 jump_list *getucdtype;
490 #if PCRE2_CODE_UNIT_WIDTH == 8
491 jump_list *utfreadchar;
492 jump_list *utfreadtype8;
493 jump_list *utfpeakcharback;
494 #endif
495 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496 jump_list *utfreadchar_invalid;
497 jump_list *utfreadnewline_invalid;
498 jump_list *utfmoveback_invalid;
499 jump_list *utfpeakcharback_invalid;
500 #endif
501 #endif /* SUPPORT_UNICODE */
502 } compiler_common;
503
504 /* For byte_sequence_compare. */
505
506 typedef struct compare_context {
507 int length;
508 int sourcereg;
509 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510 int ucharptr;
511 union {
512 sljit_s32 asint;
513 sljit_u16 asushort;
514 #if PCRE2_CODE_UNIT_WIDTH == 8
515 sljit_u8 asbyte;
516 sljit_u8 asuchars[4];
517 #elif PCRE2_CODE_UNIT_WIDTH == 16
518 sljit_u16 asuchars[2];
519 #elif PCRE2_CODE_UNIT_WIDTH == 32
520 sljit_u32 asuchars[1];
521 #endif
522 } c;
523 union {
524 sljit_s32 asint;
525 sljit_u16 asushort;
526 #if PCRE2_CODE_UNIT_WIDTH == 8
527 sljit_u8 asbyte;
528 sljit_u8 asuchars[4];
529 #elif PCRE2_CODE_UNIT_WIDTH == 16
530 sljit_u16 asuchars[2];
531 #elif PCRE2_CODE_UNIT_WIDTH == 32
532 sljit_u32 asuchars[1];
533 #endif
534 } oc;
535 #endif
536 } compare_context;
537
538 /* Undefine sljit macros. */
539 #undef CMP
540
541 /* Used for accessing the elements of the stack. */
542 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
543
544 #ifdef SLJIT_PREF_SHIFT_REG
545 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546 /* Nothing. */
547 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548 #define SHIFT_REG_IS_R3
549 #else
550 #error "Unsupported shift register"
551 #endif
552 #endif
553
554 #define TMP1 SLJIT_R0
555 #ifdef SHIFT_REG_IS_R3
556 #define TMP2 SLJIT_R3
557 #define TMP3 SLJIT_R2
558 #else
559 #define TMP2 SLJIT_R2
560 #define TMP3 SLJIT_R3
561 #endif
562 #define STR_PTR SLJIT_R1
563 #define STR_END SLJIT_S0
564 #define STACK_TOP SLJIT_S1
565 #define STACK_LIMIT SLJIT_S2
566 #define COUNT_MATCH SLJIT_S3
567 #define ARGUMENTS SLJIT_S4
568 #define RETURN_ADDR SLJIT_R4
569
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 #define HAS_VIRTUAL_REGISTERS 1
572 #else
573 #define HAS_VIRTUAL_REGISTERS 0
574 #endif
575
576 /* Local space layout. */
577 /* These two locals can be used by the current opcode. */
578 #define LOCALS0 (0 * sizeof(sljit_sw))
579 #define LOCALS1 (1 * sizeof(sljit_sw))
580 /* Two local variables for possessive quantifiers (char1 cannot use them). */
581 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
582 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
583 /* Max limit of recursions. */
584 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
585 /* The output vector is stored on the stack, and contains pointers
586 to characters. The vector data is divided into two groups: the first
587 group contains the start / end character pointers, and the second is
588 the start pointers when the end of the capturing group has not yet reached. */
589 #define OVECTOR_START (common->ovector_start)
590 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593
594 #if PCRE2_CODE_UNIT_WIDTH == 8
595 #define MOV_UCHAR SLJIT_MOV_U8
596 #define IN_UCHARS(x) (x)
597 #elif PCRE2_CODE_UNIT_WIDTH == 16
598 #define MOV_UCHAR SLJIT_MOV_U16
599 #define UCHAR_SHIFT (1)
600 #define IN_UCHARS(x) ((x) * 2)
601 #elif PCRE2_CODE_UNIT_WIDTH == 32
602 #define MOV_UCHAR SLJIT_MOV_U32
603 #define UCHAR_SHIFT (2)
604 #define IN_UCHARS(x) ((x) * 4)
605 #else
606 #error Unsupported compiling mode
607 #endif
608
609 /* Shortcuts. */
610 #define DEFINE_COMPILER \
611 struct sljit_compiler *compiler = common->compiler
612 #define OP1(op, dst, dstw, src, srcw) \
613 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616 #define OP_SRC(op, src, srcw) \
617 sljit_emit_op_src(compiler, (op), (src), (srcw))
618 #define LABEL() \
619 sljit_emit_label(compiler)
620 #define JUMP(type) \
621 sljit_emit_jump(compiler, (type))
622 #define JUMPTO(type, label) \
623 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624 #define JUMPHERE(jump) \
625 sljit_set_label((jump), sljit_emit_label(compiler))
626 #define SET_LABEL(jump, label) \
627 sljit_set_label((jump), (label))
628 #define CMP(type, src1, src1w, src2, src2w) \
629 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630 #define CMPTO(type, src1, src1w, src2, src2w, label) \
631 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632 #define OP_FLAGS(op, dst, dstw, type) \
633 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634 #define CMOV(type, dst_reg, src, srcw) \
635 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636 #define GET_LOCAL_BASE(dst, dstw, offset) \
637 sljit_get_local_base(compiler, (dst), (dstw), (offset))
638
639 #define READ_CHAR_MAX 0x7fffffff
640
641 #define INVALID_UTF_CHAR -1
642 #define UNASSIGNED_UTF_CHAR 888
643
644 #if defined SUPPORT_UNICODE
645 #if PCRE2_CODE_UNIT_WIDTH == 8
646
647 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648 { \
649 if (ptr[0] <= 0x7f) \
650 c = *ptr++; \
651 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652 { \
653 c = ptr[1] - 0x80; \
654 \
655 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656 { \
657 c |= (ptr[0] - 0xc0) << 6; \
658 ptr += 2; \
659 } \
660 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661 { \
662 c = c << 6 | (ptr[2] - 0x80); \
663 \
664 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665 { \
666 c |= (ptr[0] - 0xe0) << 12; \
667 ptr += 3; \
668 \
669 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670 { \
671 invalid_action; \
672 } \
673 } \
674 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675 { \
676 c = c << 6 | (ptr[3] - 0x80); \
677 \
678 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679 { \
680 c |= (ptr[0] - 0xf0) << 18; \
681 ptr += 4; \
682 \
683 if (c >= 0x110000 || c < 0x10000) \
684 { \
685 invalid_action; \
686 } \
687 } \
688 else \
689 { \
690 invalid_action; \
691 } \
692 } \
693 else \
694 { \
695 invalid_action; \
696 } \
697 } \
698 else \
699 { \
700 invalid_action; \
701 } \
702 } \
703 else \
704 { \
705 invalid_action; \
706 } \
707 }
708
709 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710 { \
711 c = ptr[-1]; \
712 if (c <= 0x7f) \
713 ptr--; \
714 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715 { \
716 c -= 0x80; \
717 \
718 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719 { \
720 c |= (ptr[-2] - 0xc0) << 6; \
721 ptr -= 2; \
722 } \
723 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724 { \
725 c = c << 6 | (ptr[-2] - 0x80); \
726 \
727 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728 { \
729 c |= (ptr[-3] - 0xe0) << 12; \
730 ptr -= 3; \
731 \
732 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733 { \
734 invalid_action; \
735 } \
736 } \
737 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738 { \
739 c = c << 6 | (ptr[-3] - 0x80); \
740 \
741 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742 { \
743 c |= (ptr[-4] - 0xf0) << 18; \
744 ptr -= 4; \
745 \
746 if (c >= 0x110000 || c < 0x10000) \
747 { \
748 invalid_action; \
749 } \
750 } \
751 else \
752 { \
753 invalid_action; \
754 } \
755 } \
756 else \
757 { \
758 invalid_action; \
759 } \
760 } \
761 else \
762 { \
763 invalid_action; \
764 } \
765 } \
766 else \
767 { \
768 invalid_action; \
769 } \
770 }
771
772 #elif PCRE2_CODE_UNIT_WIDTH == 16
773
774 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775 { \
776 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777 c = *ptr++; \
778 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779 { \
780 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781 ptr += 2; \
782 } \
783 else \
784 { \
785 invalid_action; \
786 } \
787 }
788
789 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790 { \
791 c = ptr[-1]; \
792 if (c < 0xd800 || c >= 0xe000) \
793 ptr--; \
794 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795 { \
796 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797 ptr -= 2; \
798 } \
799 else \
800 { \
801 invalid_action; \
802 } \
803 }
804
805
806 #elif PCRE2_CODE_UNIT_WIDTH == 32
807
808 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809 { \
810 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811 c = *ptr++; \
812 else \
813 { \
814 invalid_action; \
815 } \
816 }
817
818 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819 { \
820 c = ptr[-1]; \
821 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822 ptr--; \
823 else \
824 { \
825 invalid_action; \
826 } \
827 }
828
829 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830 #endif /* SUPPORT_UNICODE */
831
bracketend(PCRE2_SPTR cc)832 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833 {
834 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835 do cc += GET(cc, 1); while (*cc == OP_ALT);
836 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837 cc += 1 + LINK_SIZE;
838 return cc;
839 }
840
no_alternatives(PCRE2_SPTR cc)841 static int no_alternatives(PCRE2_SPTR cc)
842 {
843 int count = 0;
844 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845 do
846 {
847 cc += GET(cc, 1);
848 count++;
849 }
850 while (*cc == OP_ALT);
851 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852 return count;
853 }
854
855 /* Functions whose might need modification for all new supported opcodes:
856 next_opcode
857 check_opcode_types
858 set_private_data_ptrs
859 get_framesize
860 init_frame
861 get_recurse_data_length
862 copy_recurse_data
863 compile_matchingpath
864 compile_backtrackingpath
865 */
866
next_opcode(compiler_common * common,PCRE2_SPTR cc)867 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868 {
869 SLJIT_UNUSED_ARG(common);
870 switch(*cc)
871 {
872 case OP_SOD:
873 case OP_SOM:
874 case OP_SET_SOM:
875 case OP_NOT_WORD_BOUNDARY:
876 case OP_WORD_BOUNDARY:
877 case OP_NOT_DIGIT:
878 case OP_DIGIT:
879 case OP_NOT_WHITESPACE:
880 case OP_WHITESPACE:
881 case OP_NOT_WORDCHAR:
882 case OP_WORDCHAR:
883 case OP_ANY:
884 case OP_ALLANY:
885 case OP_NOTPROP:
886 case OP_PROP:
887 case OP_ANYNL:
888 case OP_NOT_HSPACE:
889 case OP_HSPACE:
890 case OP_NOT_VSPACE:
891 case OP_VSPACE:
892 case OP_EXTUNI:
893 case OP_EODN:
894 case OP_EOD:
895 case OP_CIRC:
896 case OP_CIRCM:
897 case OP_DOLL:
898 case OP_DOLLM:
899 case OP_CRSTAR:
900 case OP_CRMINSTAR:
901 case OP_CRPLUS:
902 case OP_CRMINPLUS:
903 case OP_CRQUERY:
904 case OP_CRMINQUERY:
905 case OP_CRRANGE:
906 case OP_CRMINRANGE:
907 case OP_CRPOSSTAR:
908 case OP_CRPOSPLUS:
909 case OP_CRPOSQUERY:
910 case OP_CRPOSRANGE:
911 case OP_CLASS:
912 case OP_NCLASS:
913 case OP_REF:
914 case OP_REFI:
915 case OP_DNREF:
916 case OP_DNREFI:
917 case OP_RECURSE:
918 case OP_CALLOUT:
919 case OP_ALT:
920 case OP_KET:
921 case OP_KETRMAX:
922 case OP_KETRMIN:
923 case OP_KETRPOS:
924 case OP_REVERSE:
925 case OP_ASSERT:
926 case OP_ASSERT_NOT:
927 case OP_ASSERTBACK:
928 case OP_ASSERTBACK_NOT:
929 case OP_ASSERT_NA:
930 case OP_ASSERTBACK_NA:
931 case OP_ONCE:
932 case OP_SCRIPT_RUN:
933 case OP_BRA:
934 case OP_BRAPOS:
935 case OP_CBRA:
936 case OP_CBRAPOS:
937 case OP_COND:
938 case OP_SBRA:
939 case OP_SBRAPOS:
940 case OP_SCBRA:
941 case OP_SCBRAPOS:
942 case OP_SCOND:
943 case OP_CREF:
944 case OP_DNCREF:
945 case OP_RREF:
946 case OP_DNRREF:
947 case OP_FALSE:
948 case OP_TRUE:
949 case OP_BRAZERO:
950 case OP_BRAMINZERO:
951 case OP_BRAPOSZERO:
952 case OP_PRUNE:
953 case OP_SKIP:
954 case OP_THEN:
955 case OP_COMMIT:
956 case OP_FAIL:
957 case OP_ACCEPT:
958 case OP_ASSERT_ACCEPT:
959 case OP_CLOSE:
960 case OP_SKIPZERO:
961 return cc + PRIV(OP_lengths)[*cc];
962
963 case OP_CHAR:
964 case OP_CHARI:
965 case OP_NOT:
966 case OP_NOTI:
967 case OP_STAR:
968 case OP_MINSTAR:
969 case OP_PLUS:
970 case OP_MINPLUS:
971 case OP_QUERY:
972 case OP_MINQUERY:
973 case OP_UPTO:
974 case OP_MINUPTO:
975 case OP_EXACT:
976 case OP_POSSTAR:
977 case OP_POSPLUS:
978 case OP_POSQUERY:
979 case OP_POSUPTO:
980 case OP_STARI:
981 case OP_MINSTARI:
982 case OP_PLUSI:
983 case OP_MINPLUSI:
984 case OP_QUERYI:
985 case OP_MINQUERYI:
986 case OP_UPTOI:
987 case OP_MINUPTOI:
988 case OP_EXACTI:
989 case OP_POSSTARI:
990 case OP_POSPLUSI:
991 case OP_POSQUERYI:
992 case OP_POSUPTOI:
993 case OP_NOTSTAR:
994 case OP_NOTMINSTAR:
995 case OP_NOTPLUS:
996 case OP_NOTMINPLUS:
997 case OP_NOTQUERY:
998 case OP_NOTMINQUERY:
999 case OP_NOTUPTO:
1000 case OP_NOTMINUPTO:
1001 case OP_NOTEXACT:
1002 case OP_NOTPOSSTAR:
1003 case OP_NOTPOSPLUS:
1004 case OP_NOTPOSQUERY:
1005 case OP_NOTPOSUPTO:
1006 case OP_NOTSTARI:
1007 case OP_NOTMINSTARI:
1008 case OP_NOTPLUSI:
1009 case OP_NOTMINPLUSI:
1010 case OP_NOTQUERYI:
1011 case OP_NOTMINQUERYI:
1012 case OP_NOTUPTOI:
1013 case OP_NOTMINUPTOI:
1014 case OP_NOTEXACTI:
1015 case OP_NOTPOSSTARI:
1016 case OP_NOTPOSPLUSI:
1017 case OP_NOTPOSQUERYI:
1018 case OP_NOTPOSUPTOI:
1019 cc += PRIV(OP_lengths)[*cc];
1020 #ifdef SUPPORT_UNICODE
1021 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022 #endif
1023 return cc;
1024
1025 /* Special cases. */
1026 case OP_TYPESTAR:
1027 case OP_TYPEMINSTAR:
1028 case OP_TYPEPLUS:
1029 case OP_TYPEMINPLUS:
1030 case OP_TYPEQUERY:
1031 case OP_TYPEMINQUERY:
1032 case OP_TYPEUPTO:
1033 case OP_TYPEMINUPTO:
1034 case OP_TYPEEXACT:
1035 case OP_TYPEPOSSTAR:
1036 case OP_TYPEPOSPLUS:
1037 case OP_TYPEPOSQUERY:
1038 case OP_TYPEPOSUPTO:
1039 return cc + PRIV(OP_lengths)[*cc] - 1;
1040
1041 case OP_ANYBYTE:
1042 #ifdef SUPPORT_UNICODE
1043 if (common->utf) return NULL;
1044 #endif
1045 return cc + 1;
1046
1047 case OP_CALLOUT_STR:
1048 return cc + GET(cc, 1 + 2*LINK_SIZE);
1049
1050 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051 case OP_XCLASS:
1052 return cc + GET(cc, 1);
1053 #endif
1054
1055 case OP_MARK:
1056 case OP_COMMIT_ARG:
1057 case OP_PRUNE_ARG:
1058 case OP_SKIP_ARG:
1059 case OP_THEN_ARG:
1060 return cc + 1 + 2 + cc[1];
1061
1062 default:
1063 SLJIT_UNREACHABLE();
1064 return NULL;
1065 }
1066 }
1067
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1068 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069 {
1070 int count;
1071 PCRE2_SPTR slot;
1072 PCRE2_SPTR assert_back_end = cc - 1;
1073 PCRE2_SPTR assert_na_end = cc - 1;
1074
1075 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076 while (cc < ccend)
1077 {
1078 switch(*cc)
1079 {
1080 case OP_SET_SOM:
1081 common->has_set_som = TRUE;
1082 common->might_be_empty = TRUE;
1083 cc += 1;
1084 break;
1085
1086 case OP_REFI:
1087 #ifdef SUPPORT_UNICODE
1088 if (common->iref_ptr == 0)
1089 {
1090 common->iref_ptr = common->ovector_start;
1091 common->ovector_start += 3 * sizeof(sljit_sw);
1092 }
1093 #endif /* SUPPORT_UNICODE */
1094 /* Fall through. */
1095 case OP_REF:
1096 common->optimized_cbracket[GET2(cc, 1)] = 0;
1097 cc += 1 + IMM2_SIZE;
1098 break;
1099
1100 case OP_ASSERT_NA:
1101 case OP_ASSERTBACK_NA:
1102 slot = bracketend(cc);
1103 if (slot > assert_na_end)
1104 assert_na_end = slot;
1105 cc += 1 + LINK_SIZE;
1106 break;
1107
1108 case OP_CBRAPOS:
1109 case OP_SCBRAPOS:
1110 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111 cc += 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 case OP_COND:
1115 case OP_SCOND:
1116 /* Only AUTO_CALLOUT can insert this opcode. We do
1117 not intend to support this case. */
1118 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119 return FALSE;
1120 cc += 1 + LINK_SIZE;
1121 break;
1122
1123 case OP_CREF:
1124 common->optimized_cbracket[GET2(cc, 1)] = 0;
1125 cc += 1 + IMM2_SIZE;
1126 break;
1127
1128 case OP_DNREF:
1129 case OP_DNREFI:
1130 case OP_DNCREF:
1131 count = GET2(cc, 1 + IMM2_SIZE);
1132 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133 while (count-- > 0)
1134 {
1135 common->optimized_cbracket[GET2(slot, 0)] = 0;
1136 slot += common->name_entry_size;
1137 }
1138 cc += 1 + 2 * IMM2_SIZE;
1139 break;
1140
1141 case OP_RECURSE:
1142 /* Set its value only once. */
1143 if (common->recursive_head_ptr == 0)
1144 {
1145 common->recursive_head_ptr = common->ovector_start;
1146 common->ovector_start += sizeof(sljit_sw);
1147 }
1148 cc += 1 + LINK_SIZE;
1149 break;
1150
1151 case OP_CALLOUT:
1152 case OP_CALLOUT_STR:
1153 if (common->capture_last_ptr == 0)
1154 {
1155 common->capture_last_ptr = common->ovector_start;
1156 common->ovector_start += sizeof(sljit_sw);
1157 }
1158 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159 break;
1160
1161 case OP_ASSERTBACK:
1162 slot = bracketend(cc);
1163 if (slot > assert_back_end)
1164 assert_back_end = slot;
1165 cc += 1 + LINK_SIZE;
1166 break;
1167
1168 case OP_THEN_ARG:
1169 common->has_then = TRUE;
1170 common->control_head_ptr = 1;
1171 /* Fall through. */
1172
1173 case OP_COMMIT_ARG:
1174 case OP_PRUNE_ARG:
1175 if (cc < assert_na_end)
1176 return FALSE;
1177 /* Fall through */
1178 case OP_MARK:
1179 if (common->mark_ptr == 0)
1180 {
1181 common->mark_ptr = common->ovector_start;
1182 common->ovector_start += sizeof(sljit_sw);
1183 }
1184 cc += 1 + 2 + cc[1];
1185 break;
1186
1187 case OP_THEN:
1188 common->has_then = TRUE;
1189 common->control_head_ptr = 1;
1190 cc += 1;
1191 break;
1192
1193 case OP_SKIP:
1194 if (cc < assert_back_end)
1195 common->has_skip_in_assert_back = TRUE;
1196 if (cc < assert_na_end)
1197 return FALSE;
1198 cc += 1;
1199 break;
1200
1201 case OP_SKIP_ARG:
1202 common->control_head_ptr = 1;
1203 common->has_skip_arg = TRUE;
1204 if (cc < assert_back_end)
1205 common->has_skip_in_assert_back = TRUE;
1206 if (cc < assert_na_end)
1207 return FALSE;
1208 cc += 1 + 2 + cc[1];
1209 break;
1210
1211 case OP_PRUNE:
1212 case OP_COMMIT:
1213 case OP_ASSERT_ACCEPT:
1214 if (cc < assert_na_end)
1215 return FALSE;
1216 cc++;
1217 break;
1218
1219 default:
1220 cc = next_opcode(common, cc);
1221 if (cc == NULL)
1222 return FALSE;
1223 break;
1224 }
1225 }
1226 return TRUE;
1227 }
1228
1229 #define EARLY_FAIL_ENHANCE_MAX (1 + 1)
1230
1231 /*
1232 start:
1233 0 - skip / early fail allowed
1234 1 - only early fail with range allowed
1235 >1 - (start - 1) early fail is processed
1236
1237 return: current number of iterators enhanced with fast fail
1238 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1239 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
1240 {
1241 PCRE2_SPTR next_alt;
1242 PCRE2_SPTR end;
1243 PCRE2_SPTR accelerated_start;
1244 int result = 0;
1245 int count;
1246 BOOL fast_forward_allowed = TRUE;
1247
1248 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1249 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1250 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1251
1252 next_alt = cc + GET(cc, 1);
1253 if (*next_alt == OP_ALT)
1254 fast_forward_allowed = FALSE;
1255
1256 do
1257 {
1258 count = start;
1259 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1260
1261 while (TRUE)
1262 {
1263 accelerated_start = NULL;
1264
1265 switch(*cc)
1266 {
1267 case OP_SOD:
1268 case OP_SOM:
1269 case OP_SET_SOM:
1270 case OP_NOT_WORD_BOUNDARY:
1271 case OP_WORD_BOUNDARY:
1272 case OP_EODN:
1273 case OP_EOD:
1274 case OP_CIRC:
1275 case OP_CIRCM:
1276 case OP_DOLL:
1277 case OP_DOLLM:
1278 /* Zero width assertions. */
1279 cc++;
1280 continue;
1281
1282 case OP_NOT_DIGIT:
1283 case OP_DIGIT:
1284 case OP_NOT_WHITESPACE:
1285 case OP_WHITESPACE:
1286 case OP_NOT_WORDCHAR:
1287 case OP_WORDCHAR:
1288 case OP_ANY:
1289 case OP_ALLANY:
1290 case OP_ANYBYTE:
1291 case OP_NOT_HSPACE:
1292 case OP_HSPACE:
1293 case OP_NOT_VSPACE:
1294 case OP_VSPACE:
1295 fast_forward_allowed = FALSE;
1296 cc++;
1297 continue;
1298
1299 case OP_ANYNL:
1300 case OP_EXTUNI:
1301 fast_forward_allowed = FALSE;
1302 if (count == 0)
1303 count = 1;
1304 cc++;
1305 continue;
1306
1307 case OP_NOTPROP:
1308 case OP_PROP:
1309 fast_forward_allowed = FALSE;
1310 cc += 1 + 2;
1311 continue;
1312
1313 case OP_CHAR:
1314 case OP_CHARI:
1315 case OP_NOT:
1316 case OP_NOTI:
1317 fast_forward_allowed = FALSE;
1318 cc += 2;
1319 #ifdef SUPPORT_UNICODE
1320 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1321 #endif
1322 continue;
1323
1324 case OP_TYPESTAR:
1325 case OP_TYPEMINSTAR:
1326 case OP_TYPEPLUS:
1327 case OP_TYPEMINPLUS:
1328 case OP_TYPEPOSSTAR:
1329 case OP_TYPEPOSPLUS:
1330 /* The type or prop opcode is skipped in the next iteration. */
1331 cc += 1;
1332
1333 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1334 {
1335 accelerated_start = cc - 1;
1336 break;
1337 }
1338
1339 if (count == 0)
1340 count = 1;
1341 fast_forward_allowed = FALSE;
1342 continue;
1343
1344 case OP_TYPEUPTO:
1345 case OP_TYPEMINUPTO:
1346 case OP_TYPEEXACT:
1347 case OP_TYPEPOSUPTO:
1348 cc += IMM2_SIZE;
1349 /* Fall through */
1350
1351 case OP_TYPEQUERY:
1352 case OP_TYPEMINQUERY:
1353 case OP_TYPEPOSQUERY:
1354 /* The type or prop opcode is skipped in the next iteration. */
1355 fast_forward_allowed = FALSE;
1356 if (count == 0)
1357 count = 1;
1358 cc += 1;
1359 continue;
1360
1361 case OP_STAR:
1362 case OP_MINSTAR:
1363 case OP_PLUS:
1364 case OP_MINPLUS:
1365 case OP_POSSTAR:
1366 case OP_POSPLUS:
1367
1368 case OP_STARI:
1369 case OP_MINSTARI:
1370 case OP_PLUSI:
1371 case OP_MINPLUSI:
1372 case OP_POSSTARI:
1373 case OP_POSPLUSI:
1374
1375 case OP_NOTSTAR:
1376 case OP_NOTMINSTAR:
1377 case OP_NOTPLUS:
1378 case OP_NOTMINPLUS:
1379 case OP_NOTPOSSTAR:
1380 case OP_NOTPOSPLUS:
1381
1382 case OP_NOTSTARI:
1383 case OP_NOTMINSTARI:
1384 case OP_NOTPLUSI:
1385 case OP_NOTMINPLUSI:
1386 case OP_NOTPOSSTARI:
1387 case OP_NOTPOSPLUSI:
1388 accelerated_start = cc;
1389 cc += 2;
1390 #ifdef SUPPORT_UNICODE
1391 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1392 #endif
1393 break;
1394
1395 case OP_UPTO:
1396 case OP_MINUPTO:
1397 case OP_EXACT:
1398 case OP_POSUPTO:
1399 case OP_UPTOI:
1400 case OP_MINUPTOI:
1401 case OP_EXACTI:
1402 case OP_POSUPTOI:
1403 case OP_NOTUPTO:
1404 case OP_NOTMINUPTO:
1405 case OP_NOTEXACT:
1406 case OP_NOTPOSUPTO:
1407 case OP_NOTUPTOI:
1408 case OP_NOTMINUPTOI:
1409 case OP_NOTEXACTI:
1410 case OP_NOTPOSUPTOI:
1411 cc += IMM2_SIZE;
1412 /* Fall through */
1413
1414 case OP_QUERY:
1415 case OP_MINQUERY:
1416 case OP_POSQUERY:
1417 case OP_QUERYI:
1418 case OP_MINQUERYI:
1419 case OP_POSQUERYI:
1420 case OP_NOTQUERY:
1421 case OP_NOTMINQUERY:
1422 case OP_NOTPOSQUERY:
1423 case OP_NOTQUERYI:
1424 case OP_NOTMINQUERYI:
1425 case OP_NOTPOSQUERYI:
1426 fast_forward_allowed = FALSE;
1427 if (count == 0)
1428 count = 1;
1429 cc += 2;
1430 #ifdef SUPPORT_UNICODE
1431 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1432 #endif
1433 continue;
1434
1435 case OP_CLASS:
1436 case OP_NCLASS:
1437 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1438 case OP_XCLASS:
1439 accelerated_start = cc;
1440 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1441 #else
1442 accelerated_start = cc;
1443 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1444 #endif
1445
1446 switch (*cc)
1447 {
1448 case OP_CRSTAR:
1449 case OP_CRMINSTAR:
1450 case OP_CRPLUS:
1451 case OP_CRMINPLUS:
1452 case OP_CRPOSSTAR:
1453 case OP_CRPOSPLUS:
1454 cc++;
1455 break;
1456
1457 case OP_CRRANGE:
1458 case OP_CRMINRANGE:
1459 case OP_CRPOSRANGE:
1460 cc += 2 * IMM2_SIZE;
1461 /* Fall through */
1462 case OP_CRQUERY:
1463 case OP_CRMINQUERY:
1464 case OP_CRPOSQUERY:
1465 cc++;
1466 if (count == 0)
1467 count = 1;
1468 /* Fall through */
1469 default:
1470 accelerated_start = NULL;
1471 fast_forward_allowed = FALSE;
1472 continue;
1473 }
1474 break;
1475
1476 case OP_ONCE:
1477 case OP_BRA:
1478 case OP_CBRA:
1479 end = cc + GET(cc, 1);
1480
1481 if (*end == OP_KET && PRIVATE_DATA(end) == 0)
1482 {
1483 if (*cc == OP_CBRA)
1484 {
1485 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1486 break;
1487 cc += IMM2_SIZE;
1488 }
1489
1490 cc += 1 + LINK_SIZE;
1491 continue;
1492 }
1493
1494 fast_forward_allowed = FALSE;
1495 if (depth >= 4)
1496 break;
1497
1498 end = bracketend(cc) - (1 + LINK_SIZE);
1499 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1500 break;
1501
1502 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1503 break;
1504
1505 count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
1506 if (count < EARLY_FAIL_ENHANCE_MAX)
1507 {
1508 cc = end + (1 + LINK_SIZE);
1509 continue;
1510 }
1511 break;
1512
1513 case OP_KET:
1514 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1515 if (cc >= next_alt)
1516 break;
1517 cc += 1 + LINK_SIZE;
1518 continue;
1519 }
1520
1521 if (accelerated_start != NULL)
1522 {
1523 if (count == 0)
1524 {
1525 count++;
1526
1527 if (fast_forward_allowed)
1528 {
1529 common->fast_forward_bc_ptr = accelerated_start;
1530 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1531 *private_data_start += sizeof(sljit_sw);
1532 }
1533 else
1534 {
1535 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1536
1537 if (common->early_fail_start_ptr == 0)
1538 common->early_fail_start_ptr = *private_data_start;
1539
1540 *private_data_start += sizeof(sljit_sw);
1541 common->early_fail_end_ptr = *private_data_start;
1542
1543 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1544 return EARLY_FAIL_ENHANCE_MAX;
1545 }
1546 }
1547 else
1548 {
1549 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1550
1551 if (common->early_fail_start_ptr == 0)
1552 common->early_fail_start_ptr = *private_data_start;
1553
1554 *private_data_start += 2 * sizeof(sljit_sw);
1555 common->early_fail_end_ptr = *private_data_start;
1556
1557 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1558 return EARLY_FAIL_ENHANCE_MAX;
1559 }
1560
1561 count++;
1562
1563 if (count < EARLY_FAIL_ENHANCE_MAX)
1564 continue;
1565 }
1566
1567 break;
1568 }
1569
1570 if (*cc != OP_ALT && *cc != OP_KET)
1571 result = EARLY_FAIL_ENHANCE_MAX;
1572 else if (result < count)
1573 result = count;
1574
1575 cc = next_alt;
1576 next_alt = cc + GET(cc, 1);
1577 }
1578 while (*cc == OP_ALT);
1579
1580 return result;
1581 }
1582
get_class_iterator_size(PCRE2_SPTR cc)1583 static int get_class_iterator_size(PCRE2_SPTR cc)
1584 {
1585 sljit_u32 min;
1586 sljit_u32 max;
1587 switch(*cc)
1588 {
1589 case OP_CRSTAR:
1590 case OP_CRPLUS:
1591 return 2;
1592
1593 case OP_CRMINSTAR:
1594 case OP_CRMINPLUS:
1595 case OP_CRQUERY:
1596 case OP_CRMINQUERY:
1597 return 1;
1598
1599 case OP_CRRANGE:
1600 case OP_CRMINRANGE:
1601 min = GET2(cc, 1);
1602 max = GET2(cc, 1 + IMM2_SIZE);
1603 if (max == 0)
1604 return (*cc == OP_CRRANGE) ? 2 : 1;
1605 max -= min;
1606 if (max > 2)
1607 max = 2;
1608 return max;
1609
1610 default:
1611 return 0;
1612 }
1613 }
1614
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1615 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1616 {
1617 PCRE2_SPTR end = bracketend(begin);
1618 PCRE2_SPTR next;
1619 PCRE2_SPTR next_end;
1620 PCRE2_SPTR max_end;
1621 PCRE2_UCHAR type;
1622 sljit_sw length = end - begin;
1623 sljit_s32 min, max, i;
1624
1625 /* Detect fixed iterations first. */
1626 if (end[-(1 + LINK_SIZE)] != OP_KET)
1627 return FALSE;
1628
1629 /* Already detected repeat. */
1630 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1631 return TRUE;
1632
1633 next = end;
1634 min = 1;
1635 while (1)
1636 {
1637 if (*next != *begin)
1638 break;
1639 next_end = bracketend(next);
1640 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1641 break;
1642 next = next_end;
1643 min++;
1644 }
1645
1646 if (min == 2)
1647 return FALSE;
1648
1649 max = 0;
1650 max_end = next;
1651 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1652 {
1653 type = *next;
1654 while (1)
1655 {
1656 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1657 break;
1658 next_end = bracketend(next + 2 + LINK_SIZE);
1659 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1660 break;
1661 next = next_end;
1662 max++;
1663 }
1664
1665 if (next[0] == type && next[1] == *begin && max >= 1)
1666 {
1667 next_end = bracketend(next + 1);
1668 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1669 {
1670 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1671 if (*next_end != OP_KET)
1672 break;
1673
1674 if (i == max)
1675 {
1676 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1677 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1678 /* +2 the original and the last. */
1679 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1680 if (min == 1)
1681 return TRUE;
1682 min--;
1683 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1684 }
1685 }
1686 }
1687 }
1688
1689 if (min >= 3)
1690 {
1691 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1692 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1693 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1694 return TRUE;
1695 }
1696
1697 return FALSE;
1698 }
1699
1700 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1701 case OP_MINSTAR: \
1702 case OP_MINPLUS: \
1703 case OP_QUERY: \
1704 case OP_MINQUERY: \
1705 case OP_MINSTARI: \
1706 case OP_MINPLUSI: \
1707 case OP_QUERYI: \
1708 case OP_MINQUERYI: \
1709 case OP_NOTMINSTAR: \
1710 case OP_NOTMINPLUS: \
1711 case OP_NOTQUERY: \
1712 case OP_NOTMINQUERY: \
1713 case OP_NOTMINSTARI: \
1714 case OP_NOTMINPLUSI: \
1715 case OP_NOTQUERYI: \
1716 case OP_NOTMINQUERYI:
1717
1718 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1719 case OP_STAR: \
1720 case OP_PLUS: \
1721 case OP_STARI: \
1722 case OP_PLUSI: \
1723 case OP_NOTSTAR: \
1724 case OP_NOTPLUS: \
1725 case OP_NOTSTARI: \
1726 case OP_NOTPLUSI:
1727
1728 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1729 case OP_UPTO: \
1730 case OP_MINUPTO: \
1731 case OP_UPTOI: \
1732 case OP_MINUPTOI: \
1733 case OP_NOTUPTO: \
1734 case OP_NOTMINUPTO: \
1735 case OP_NOTUPTOI: \
1736 case OP_NOTMINUPTOI:
1737
1738 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1739 case OP_TYPEMINSTAR: \
1740 case OP_TYPEMINPLUS: \
1741 case OP_TYPEQUERY: \
1742 case OP_TYPEMINQUERY:
1743
1744 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1745 case OP_TYPESTAR: \
1746 case OP_TYPEPLUS:
1747
1748 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1749 case OP_TYPEUPTO: \
1750 case OP_TYPEMINUPTO:
1751
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1752 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1753 {
1754 PCRE2_SPTR cc = common->start;
1755 PCRE2_SPTR alternative;
1756 PCRE2_SPTR end = NULL;
1757 int private_data_ptr = *private_data_start;
1758 int space, size, bracketlen;
1759 BOOL repeat_check = TRUE;
1760
1761 while (cc < ccend)
1762 {
1763 space = 0;
1764 size = 0;
1765 bracketlen = 0;
1766 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1767 break;
1768
1769 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770 {
1771 if (detect_repeat(common, cc))
1772 {
1773 /* These brackets are converted to repeats, so no global
1774 based single character repeat is allowed. */
1775 if (cc >= end)
1776 end = bracketend(cc);
1777 }
1778 }
1779 repeat_check = TRUE;
1780
1781 switch(*cc)
1782 {
1783 case OP_KET:
1784 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785 {
1786 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787 private_data_ptr += sizeof(sljit_sw);
1788 cc += common->private_data_ptrs[cc + 1 - common->start];
1789 }
1790 cc += 1 + LINK_SIZE;
1791 break;
1792
1793 case OP_ASSERT:
1794 case OP_ASSERT_NOT:
1795 case OP_ASSERTBACK:
1796 case OP_ASSERTBACK_NOT:
1797 case OP_ASSERT_NA:
1798 case OP_ASSERTBACK_NA:
1799 case OP_ONCE:
1800 case OP_SCRIPT_RUN:
1801 case OP_BRAPOS:
1802 case OP_SBRA:
1803 case OP_SBRAPOS:
1804 case OP_SCOND:
1805 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806 private_data_ptr += sizeof(sljit_sw);
1807 bracketlen = 1 + LINK_SIZE;
1808 break;
1809
1810 case OP_CBRAPOS:
1811 case OP_SCBRAPOS:
1812 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813 private_data_ptr += sizeof(sljit_sw);
1814 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815 break;
1816
1817 case OP_COND:
1818 /* Might be a hidden SCOND. */
1819 alternative = cc + GET(cc, 1);
1820 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1821 {
1822 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1823 private_data_ptr += sizeof(sljit_sw);
1824 }
1825 bracketlen = 1 + LINK_SIZE;
1826 break;
1827
1828 case OP_BRA:
1829 bracketlen = 1 + LINK_SIZE;
1830 break;
1831
1832 case OP_CBRA:
1833 case OP_SCBRA:
1834 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1835 break;
1836
1837 case OP_BRAZERO:
1838 case OP_BRAMINZERO:
1839 case OP_BRAPOSZERO:
1840 size = 1;
1841 repeat_check = FALSE;
1842 break;
1843
1844 CASE_ITERATOR_PRIVATE_DATA_1
1845 size = -2;
1846 space = 1;
1847 break;
1848
1849 CASE_ITERATOR_PRIVATE_DATA_2A
1850 size = -2;
1851 space = 2;
1852 break;
1853
1854 CASE_ITERATOR_PRIVATE_DATA_2B
1855 size = -(2 + IMM2_SIZE);
1856 space = 2;
1857 break;
1858
1859 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1860 size = 1;
1861 space = 1;
1862 break;
1863
1864 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1865 size = 1;
1866 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1867 space = 2;
1868 break;
1869
1870 case OP_TYPEUPTO:
1871 size = 1 + IMM2_SIZE;
1872 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1873 space = 2;
1874 break;
1875
1876 case OP_TYPEMINUPTO:
1877 size = 1 + IMM2_SIZE;
1878 space = 2;
1879 break;
1880
1881 case OP_CLASS:
1882 case OP_NCLASS:
1883 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1884 space = get_class_iterator_size(cc + size);
1885 break;
1886
1887 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1888 case OP_XCLASS:
1889 size = GET(cc, 1);
1890 space = get_class_iterator_size(cc + size);
1891 break;
1892 #endif
1893
1894 default:
1895 cc = next_opcode(common, cc);
1896 SLJIT_ASSERT(cc != NULL);
1897 break;
1898 }
1899
1900 /* Character iterators, which are not inside a repeated bracket,
1901 gets a private slot instead of allocating it on the stack. */
1902 if (space > 0 && cc >= end)
1903 {
1904 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1905 private_data_ptr += sizeof(sljit_sw) * space;
1906 }
1907
1908 if (size != 0)
1909 {
1910 if (size < 0)
1911 {
1912 cc += -size;
1913 #ifdef SUPPORT_UNICODE
1914 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1915 #endif
1916 }
1917 else
1918 cc += size;
1919 }
1920
1921 if (bracketlen > 0)
1922 {
1923 if (cc >= end)
1924 {
1925 end = bracketend(cc);
1926 if (end[-1 - LINK_SIZE] == OP_KET)
1927 end = NULL;
1928 }
1929 cc += bracketlen;
1930 }
1931 }
1932 *private_data_start = private_data_ptr;
1933 }
1934
1935 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1936 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1937 {
1938 int length = 0;
1939 int possessive = 0;
1940 BOOL stack_restore = FALSE;
1941 BOOL setsom_found = recursive;
1942 BOOL setmark_found = recursive;
1943 /* The last capture is a local variable even for recursions. */
1944 BOOL capture_last_found = FALSE;
1945
1946 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1947 SLJIT_ASSERT(common->control_head_ptr != 0);
1948 *needs_control_head = TRUE;
1949 #else
1950 *needs_control_head = FALSE;
1951 #endif
1952
1953 if (ccend == NULL)
1954 {
1955 ccend = bracketend(cc) - (1 + LINK_SIZE);
1956 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1957 {
1958 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1959 /* This is correct regardless of common->capture_last_ptr. */
1960 capture_last_found = TRUE;
1961 }
1962 cc = next_opcode(common, cc);
1963 }
1964
1965 SLJIT_ASSERT(cc != NULL);
1966 while (cc < ccend)
1967 switch(*cc)
1968 {
1969 case OP_SET_SOM:
1970 SLJIT_ASSERT(common->has_set_som);
1971 stack_restore = TRUE;
1972 if (!setsom_found)
1973 {
1974 length += 2;
1975 setsom_found = TRUE;
1976 }
1977 cc += 1;
1978 break;
1979
1980 case OP_MARK:
1981 case OP_COMMIT_ARG:
1982 case OP_PRUNE_ARG:
1983 case OP_THEN_ARG:
1984 SLJIT_ASSERT(common->mark_ptr != 0);
1985 stack_restore = TRUE;
1986 if (!setmark_found)
1987 {
1988 length += 2;
1989 setmark_found = TRUE;
1990 }
1991 if (common->control_head_ptr != 0)
1992 *needs_control_head = TRUE;
1993 cc += 1 + 2 + cc[1];
1994 break;
1995
1996 case OP_RECURSE:
1997 stack_restore = TRUE;
1998 if (common->has_set_som && !setsom_found)
1999 {
2000 length += 2;
2001 setsom_found = TRUE;
2002 }
2003 if (common->mark_ptr != 0 && !setmark_found)
2004 {
2005 length += 2;
2006 setmark_found = TRUE;
2007 }
2008 if (common->capture_last_ptr != 0 && !capture_last_found)
2009 {
2010 length += 2;
2011 capture_last_found = TRUE;
2012 }
2013 cc += 1 + LINK_SIZE;
2014 break;
2015
2016 case OP_CBRA:
2017 case OP_CBRAPOS:
2018 case OP_SCBRA:
2019 case OP_SCBRAPOS:
2020 stack_restore = TRUE;
2021 if (common->capture_last_ptr != 0 && !capture_last_found)
2022 {
2023 length += 2;
2024 capture_last_found = TRUE;
2025 }
2026 length += 3;
2027 cc += 1 + LINK_SIZE + IMM2_SIZE;
2028 break;
2029
2030 case OP_THEN:
2031 stack_restore = TRUE;
2032 if (common->control_head_ptr != 0)
2033 *needs_control_head = TRUE;
2034 cc ++;
2035 break;
2036
2037 default:
2038 stack_restore = TRUE;
2039 /* Fall through. */
2040
2041 case OP_NOT_WORD_BOUNDARY:
2042 case OP_WORD_BOUNDARY:
2043 case OP_NOT_DIGIT:
2044 case OP_DIGIT:
2045 case OP_NOT_WHITESPACE:
2046 case OP_WHITESPACE:
2047 case OP_NOT_WORDCHAR:
2048 case OP_WORDCHAR:
2049 case OP_ANY:
2050 case OP_ALLANY:
2051 case OP_ANYBYTE:
2052 case OP_NOTPROP:
2053 case OP_PROP:
2054 case OP_ANYNL:
2055 case OP_NOT_HSPACE:
2056 case OP_HSPACE:
2057 case OP_NOT_VSPACE:
2058 case OP_VSPACE:
2059 case OP_EXTUNI:
2060 case OP_EODN:
2061 case OP_EOD:
2062 case OP_CIRC:
2063 case OP_CIRCM:
2064 case OP_DOLL:
2065 case OP_DOLLM:
2066 case OP_CHAR:
2067 case OP_CHARI:
2068 case OP_NOT:
2069 case OP_NOTI:
2070
2071 case OP_EXACT:
2072 case OP_POSSTAR:
2073 case OP_POSPLUS:
2074 case OP_POSQUERY:
2075 case OP_POSUPTO:
2076
2077 case OP_EXACTI:
2078 case OP_POSSTARI:
2079 case OP_POSPLUSI:
2080 case OP_POSQUERYI:
2081 case OP_POSUPTOI:
2082
2083 case OP_NOTEXACT:
2084 case OP_NOTPOSSTAR:
2085 case OP_NOTPOSPLUS:
2086 case OP_NOTPOSQUERY:
2087 case OP_NOTPOSUPTO:
2088
2089 case OP_NOTEXACTI:
2090 case OP_NOTPOSSTARI:
2091 case OP_NOTPOSPLUSI:
2092 case OP_NOTPOSQUERYI:
2093 case OP_NOTPOSUPTOI:
2094
2095 case OP_TYPEEXACT:
2096 case OP_TYPEPOSSTAR:
2097 case OP_TYPEPOSPLUS:
2098 case OP_TYPEPOSQUERY:
2099 case OP_TYPEPOSUPTO:
2100
2101 case OP_CLASS:
2102 case OP_NCLASS:
2103 case OP_XCLASS:
2104
2105 case OP_CALLOUT:
2106 case OP_CALLOUT_STR:
2107
2108 cc = next_opcode(common, cc);
2109 SLJIT_ASSERT(cc != NULL);
2110 break;
2111 }
2112
2113 /* Possessive quantifiers can use a special case. */
2114 if (SLJIT_UNLIKELY(possessive == length))
2115 return stack_restore ? no_frame : no_stack;
2116
2117 if (length > 0)
2118 return length + 1;
2119 return stack_restore ? no_frame : no_stack;
2120 }
2121
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2122 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2123 {
2124 DEFINE_COMPILER;
2125 BOOL setsom_found = FALSE;
2126 BOOL setmark_found = FALSE;
2127 /* The last capture is a local variable even for recursions. */
2128 BOOL capture_last_found = FALSE;
2129 int offset;
2130
2131 /* >= 1 + shortest item size (2) */
2132 SLJIT_UNUSED_ARG(stacktop);
2133 SLJIT_ASSERT(stackpos >= stacktop + 2);
2134
2135 stackpos = STACK(stackpos);
2136 if (ccend == NULL)
2137 {
2138 ccend = bracketend(cc) - (1 + LINK_SIZE);
2139 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2140 cc = next_opcode(common, cc);
2141 }
2142
2143 SLJIT_ASSERT(cc != NULL);
2144 while (cc < ccend)
2145 switch(*cc)
2146 {
2147 case OP_SET_SOM:
2148 SLJIT_ASSERT(common->has_set_som);
2149 if (!setsom_found)
2150 {
2151 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2152 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2153 stackpos -= (int)sizeof(sljit_sw);
2154 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2155 stackpos -= (int)sizeof(sljit_sw);
2156 setsom_found = TRUE;
2157 }
2158 cc += 1;
2159 break;
2160
2161 case OP_MARK:
2162 case OP_COMMIT_ARG:
2163 case OP_PRUNE_ARG:
2164 case OP_THEN_ARG:
2165 SLJIT_ASSERT(common->mark_ptr != 0);
2166 if (!setmark_found)
2167 {
2168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2170 stackpos -= (int)sizeof(sljit_sw);
2171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2172 stackpos -= (int)sizeof(sljit_sw);
2173 setmark_found = TRUE;
2174 }
2175 cc += 1 + 2 + cc[1];
2176 break;
2177
2178 case OP_RECURSE:
2179 if (common->has_set_som && !setsom_found)
2180 {
2181 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2183 stackpos -= (int)sizeof(sljit_sw);
2184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2185 stackpos -= (int)sizeof(sljit_sw);
2186 setsom_found = TRUE;
2187 }
2188 if (common->mark_ptr != 0 && !setmark_found)
2189 {
2190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2192 stackpos -= (int)sizeof(sljit_sw);
2193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2194 stackpos -= (int)sizeof(sljit_sw);
2195 setmark_found = TRUE;
2196 }
2197 if (common->capture_last_ptr != 0 && !capture_last_found)
2198 {
2199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2201 stackpos -= (int)sizeof(sljit_sw);
2202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2203 stackpos -= (int)sizeof(sljit_sw);
2204 capture_last_found = TRUE;
2205 }
2206 cc += 1 + LINK_SIZE;
2207 break;
2208
2209 case OP_CBRA:
2210 case OP_CBRAPOS:
2211 case OP_SCBRA:
2212 case OP_SCBRAPOS:
2213 if (common->capture_last_ptr != 0 && !capture_last_found)
2214 {
2215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2217 stackpos -= (int)sizeof(sljit_sw);
2218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2219 stackpos -= (int)sizeof(sljit_sw);
2220 capture_last_found = TRUE;
2221 }
2222 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2223 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2224 stackpos -= (int)sizeof(sljit_sw);
2225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2226 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2228 stackpos -= (int)sizeof(sljit_sw);
2229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2230 stackpos -= (int)sizeof(sljit_sw);
2231
2232 cc += 1 + LINK_SIZE + IMM2_SIZE;
2233 break;
2234
2235 default:
2236 cc = next_opcode(common, cc);
2237 SLJIT_ASSERT(cc != NULL);
2238 break;
2239 }
2240
2241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2242 SLJIT_ASSERT(stackpos == STACK(stacktop));
2243 }
2244
2245 #define RECURSE_TMP_REG_COUNT 3
2246
2247 typedef struct delayed_mem_copy_status {
2248 struct sljit_compiler *compiler;
2249 int store_bases[RECURSE_TMP_REG_COUNT];
2250 int store_offsets[RECURSE_TMP_REG_COUNT];
2251 int tmp_regs[RECURSE_TMP_REG_COUNT];
2252 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2253 int next_tmp_reg;
2254 } delayed_mem_copy_status;
2255
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2256 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2257 {
2258 int i;
2259
2260 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2261 {
2262 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2263 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2264
2265 status->store_bases[i] = -1;
2266 }
2267 status->next_tmp_reg = 0;
2268 status->compiler = common->compiler;
2269 }
2270
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2271 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2272 int store_base, sljit_sw store_offset)
2273 {
2274 struct sljit_compiler *compiler = status->compiler;
2275 int next_tmp_reg = status->next_tmp_reg;
2276 int tmp_reg = status->tmp_regs[next_tmp_reg];
2277
2278 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2279
2280 if (status->store_bases[next_tmp_reg] == -1)
2281 {
2282 /* Preserve virtual registers. */
2283 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2284 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2285 }
2286 else
2287 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2288
2289 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2290 status->store_bases[next_tmp_reg] = store_base;
2291 status->store_offsets[next_tmp_reg] = store_offset;
2292
2293 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2294 }
2295
delayed_mem_copy_finish(delayed_mem_copy_status * status)2296 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2297 {
2298 struct sljit_compiler *compiler = status->compiler;
2299 int next_tmp_reg = status->next_tmp_reg;
2300 int tmp_reg, saved_tmp_reg, i;
2301
2302 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2303 {
2304 if (status->store_bases[next_tmp_reg] != -1)
2305 {
2306 tmp_reg = status->tmp_regs[next_tmp_reg];
2307 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2308
2309 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2310
2311 /* Restore virtual registers. */
2312 if (sljit_get_register_index(saved_tmp_reg) < 0)
2313 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2314 }
2315
2316 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2317 }
2318 }
2319
2320 #undef RECURSE_TMP_REG_COUNT
2321
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2322 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2323 BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2324 {
2325 int length = 1;
2326 int size;
2327 PCRE2_SPTR alternative;
2328 BOOL quit_found = FALSE;
2329 BOOL accept_found = FALSE;
2330 BOOL setsom_found = FALSE;
2331 BOOL setmark_found = FALSE;
2332 BOOL capture_last_found = FALSE;
2333 BOOL control_head_found = FALSE;
2334
2335 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2336 SLJIT_ASSERT(common->control_head_ptr != 0);
2337 control_head_found = TRUE;
2338 #endif
2339
2340 /* Calculate the sum of the private machine words. */
2341 while (cc < ccend)
2342 {
2343 size = 0;
2344 switch(*cc)
2345 {
2346 case OP_SET_SOM:
2347 SLJIT_ASSERT(common->has_set_som);
2348 setsom_found = TRUE;
2349 cc += 1;
2350 break;
2351
2352 case OP_RECURSE:
2353 if (common->has_set_som)
2354 setsom_found = TRUE;
2355 if (common->mark_ptr != 0)
2356 setmark_found = TRUE;
2357 if (common->capture_last_ptr != 0)
2358 capture_last_found = TRUE;
2359 cc += 1 + LINK_SIZE;
2360 break;
2361
2362 case OP_KET:
2363 if (PRIVATE_DATA(cc) != 0)
2364 {
2365 length++;
2366 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2367 cc += PRIVATE_DATA(cc + 1);
2368 }
2369 cc += 1 + LINK_SIZE;
2370 break;
2371
2372 case OP_ASSERT:
2373 case OP_ASSERT_NOT:
2374 case OP_ASSERTBACK:
2375 case OP_ASSERTBACK_NOT:
2376 case OP_ASSERT_NA:
2377 case OP_ASSERTBACK_NA:
2378 case OP_ONCE:
2379 case OP_SCRIPT_RUN:
2380 case OP_BRAPOS:
2381 case OP_SBRA:
2382 case OP_SBRAPOS:
2383 case OP_SCOND:
2384 length++;
2385 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2386 cc += 1 + LINK_SIZE;
2387 break;
2388
2389 case OP_CBRA:
2390 case OP_SCBRA:
2391 length += 2;
2392 if (common->capture_last_ptr != 0)
2393 capture_last_found = TRUE;
2394 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2395 length++;
2396 cc += 1 + LINK_SIZE + IMM2_SIZE;
2397 break;
2398
2399 case OP_CBRAPOS:
2400 case OP_SCBRAPOS:
2401 length += 2 + 2;
2402 if (common->capture_last_ptr != 0)
2403 capture_last_found = TRUE;
2404 cc += 1 + LINK_SIZE + IMM2_SIZE;
2405 break;
2406
2407 case OP_COND:
2408 /* Might be a hidden SCOND. */
2409 alternative = cc + GET(cc, 1);
2410 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2411 length++;
2412 cc += 1 + LINK_SIZE;
2413 break;
2414
2415 CASE_ITERATOR_PRIVATE_DATA_1
2416 if (PRIVATE_DATA(cc) != 0)
2417 length++;
2418 cc += 2;
2419 #ifdef SUPPORT_UNICODE
2420 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2421 #endif
2422 break;
2423
2424 CASE_ITERATOR_PRIVATE_DATA_2A
2425 if (PRIVATE_DATA(cc) != 0)
2426 length += 2;
2427 cc += 2;
2428 #ifdef SUPPORT_UNICODE
2429 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2430 #endif
2431 break;
2432
2433 CASE_ITERATOR_PRIVATE_DATA_2B
2434 if (PRIVATE_DATA(cc) != 0)
2435 length += 2;
2436 cc += 2 + IMM2_SIZE;
2437 #ifdef SUPPORT_UNICODE
2438 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2439 #endif
2440 break;
2441
2442 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2443 if (PRIVATE_DATA(cc) != 0)
2444 length++;
2445 cc += 1;
2446 break;
2447
2448 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2449 if (PRIVATE_DATA(cc) != 0)
2450 length += 2;
2451 cc += 1;
2452 break;
2453
2454 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2455 if (PRIVATE_DATA(cc) != 0)
2456 length += 2;
2457 cc += 1 + IMM2_SIZE;
2458 break;
2459
2460 case OP_CLASS:
2461 case OP_NCLASS:
2462 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2463 case OP_XCLASS:
2464 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2465 #else
2466 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2467 #endif
2468 if (PRIVATE_DATA(cc) != 0)
2469 length += get_class_iterator_size(cc + size);
2470 cc += size;
2471 break;
2472
2473 case OP_MARK:
2474 case OP_COMMIT_ARG:
2475 case OP_PRUNE_ARG:
2476 case OP_THEN_ARG:
2477 SLJIT_ASSERT(common->mark_ptr != 0);
2478 if (!setmark_found)
2479 setmark_found = TRUE;
2480 if (common->control_head_ptr != 0)
2481 control_head_found = TRUE;
2482 if (*cc != OP_MARK)
2483 quit_found = TRUE;
2484
2485 cc += 1 + 2 + cc[1];
2486 break;
2487
2488 case OP_PRUNE:
2489 case OP_SKIP:
2490 case OP_COMMIT:
2491 quit_found = TRUE;
2492 cc++;
2493 break;
2494
2495 case OP_SKIP_ARG:
2496 quit_found = TRUE;
2497 cc += 1 + 2 + cc[1];
2498 break;
2499
2500 case OP_THEN:
2501 SLJIT_ASSERT(common->control_head_ptr != 0);
2502 quit_found = TRUE;
2503 if (!control_head_found)
2504 control_head_found = TRUE;
2505 cc++;
2506 break;
2507
2508 case OP_ACCEPT:
2509 case OP_ASSERT_ACCEPT:
2510 accept_found = TRUE;
2511 cc++;
2512 break;
2513
2514 default:
2515 cc = next_opcode(common, cc);
2516 SLJIT_ASSERT(cc != NULL);
2517 break;
2518 }
2519 }
2520 SLJIT_ASSERT(cc == ccend);
2521
2522 if (control_head_found)
2523 length++;
2524 if (capture_last_found)
2525 length++;
2526 if (quit_found)
2527 {
2528 if (setsom_found)
2529 length++;
2530 if (setmark_found)
2531 length++;
2532 }
2533
2534 *needs_control_head = control_head_found;
2535 *has_quit = quit_found;
2536 *has_accept = accept_found;
2537 return length;
2538 }
2539
2540 enum copy_recurse_data_types {
2541 recurse_copy_from_global,
2542 recurse_copy_private_to_global,
2543 recurse_copy_shared_to_global,
2544 recurse_copy_kept_shared_to_global,
2545 recurse_swap_global
2546 };
2547
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2548 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2549 int type, int stackptr, int stacktop, BOOL has_quit)
2550 {
2551 delayed_mem_copy_status status;
2552 PCRE2_SPTR alternative;
2553 sljit_sw private_srcw[2];
2554 sljit_sw shared_srcw[3];
2555 sljit_sw kept_shared_srcw[2];
2556 int private_count, shared_count, kept_shared_count;
2557 int from_sp, base_reg, offset, i;
2558 BOOL setsom_found = FALSE;
2559 BOOL setmark_found = FALSE;
2560 BOOL capture_last_found = FALSE;
2561 BOOL control_head_found = FALSE;
2562
2563 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2564 SLJIT_ASSERT(common->control_head_ptr != 0);
2565 control_head_found = TRUE;
2566 #endif
2567
2568 switch (type)
2569 {
2570 case recurse_copy_from_global:
2571 from_sp = TRUE;
2572 base_reg = STACK_TOP;
2573 break;
2574
2575 case recurse_copy_private_to_global:
2576 case recurse_copy_shared_to_global:
2577 case recurse_copy_kept_shared_to_global:
2578 from_sp = FALSE;
2579 base_reg = STACK_TOP;
2580 break;
2581
2582 default:
2583 SLJIT_ASSERT(type == recurse_swap_global);
2584 from_sp = FALSE;
2585 base_reg = TMP2;
2586 break;
2587 }
2588
2589 stackptr = STACK(stackptr);
2590 stacktop = STACK(stacktop);
2591
2592 status.tmp_regs[0] = TMP1;
2593 status.saved_tmp_regs[0] = TMP1;
2594
2595 if (base_reg != TMP2)
2596 {
2597 status.tmp_regs[1] = TMP2;
2598 status.saved_tmp_regs[1] = TMP2;
2599 }
2600 else
2601 {
2602 status.saved_tmp_regs[1] = RETURN_ADDR;
2603 if (HAS_VIRTUAL_REGISTERS)
2604 status.tmp_regs[1] = STR_PTR;
2605 else
2606 status.tmp_regs[1] = RETURN_ADDR;
2607 }
2608
2609 status.saved_tmp_regs[2] = TMP3;
2610 if (HAS_VIRTUAL_REGISTERS)
2611 status.tmp_regs[2] = STR_END;
2612 else
2613 status.tmp_regs[2] = TMP3;
2614
2615 delayed_mem_copy_init(&status, common);
2616
2617 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2618 {
2619 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2620
2621 if (!from_sp)
2622 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2623
2624 if (from_sp || type == recurse_swap_global)
2625 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2626 }
2627
2628 stackptr += sizeof(sljit_sw);
2629
2630 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2631 if (type != recurse_copy_shared_to_global)
2632 {
2633 if (!from_sp)
2634 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2635
2636 if (from_sp || type == recurse_swap_global)
2637 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2638 }
2639
2640 stackptr += sizeof(sljit_sw);
2641 #endif
2642
2643 while (cc < ccend)
2644 {
2645 private_count = 0;
2646 shared_count = 0;
2647 kept_shared_count = 0;
2648
2649 switch(*cc)
2650 {
2651 case OP_SET_SOM:
2652 SLJIT_ASSERT(common->has_set_som);
2653 if (has_quit && !setsom_found)
2654 {
2655 kept_shared_srcw[0] = OVECTOR(0);
2656 kept_shared_count = 1;
2657 setsom_found = TRUE;
2658 }
2659 cc += 1;
2660 break;
2661
2662 case OP_RECURSE:
2663 if (has_quit)
2664 {
2665 if (common->has_set_som && !setsom_found)
2666 {
2667 kept_shared_srcw[0] = OVECTOR(0);
2668 kept_shared_count = 1;
2669 setsom_found = TRUE;
2670 }
2671 if (common->mark_ptr != 0 && !setmark_found)
2672 {
2673 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2674 kept_shared_count++;
2675 setmark_found = TRUE;
2676 }
2677 }
2678 if (common->capture_last_ptr != 0 && !capture_last_found)
2679 {
2680 shared_srcw[0] = common->capture_last_ptr;
2681 shared_count = 1;
2682 capture_last_found = TRUE;
2683 }
2684 cc += 1 + LINK_SIZE;
2685 break;
2686
2687 case OP_KET:
2688 if (PRIVATE_DATA(cc) != 0)
2689 {
2690 private_count = 1;
2691 private_srcw[0] = PRIVATE_DATA(cc);
2692 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2693 cc += PRIVATE_DATA(cc + 1);
2694 }
2695 cc += 1 + LINK_SIZE;
2696 break;
2697
2698 case OP_ASSERT:
2699 case OP_ASSERT_NOT:
2700 case OP_ASSERTBACK:
2701 case OP_ASSERTBACK_NOT:
2702 case OP_ASSERT_NA:
2703 case OP_ASSERTBACK_NA:
2704 case OP_ONCE:
2705 case OP_SCRIPT_RUN:
2706 case OP_BRAPOS:
2707 case OP_SBRA:
2708 case OP_SBRAPOS:
2709 case OP_SCOND:
2710 private_count = 1;
2711 private_srcw[0] = PRIVATE_DATA(cc);
2712 cc += 1 + LINK_SIZE;
2713 break;
2714
2715 case OP_CBRA:
2716 case OP_SCBRA:
2717 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2718 shared_srcw[0] = OVECTOR(offset);
2719 shared_srcw[1] = OVECTOR(offset + 1);
2720 shared_count = 2;
2721
2722 if (common->capture_last_ptr != 0 && !capture_last_found)
2723 {
2724 shared_srcw[2] = common->capture_last_ptr;
2725 shared_count = 3;
2726 capture_last_found = TRUE;
2727 }
2728
2729 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2730 {
2731 private_count = 1;
2732 private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2733 }
2734 cc += 1 + LINK_SIZE + IMM2_SIZE;
2735 break;
2736
2737 case OP_CBRAPOS:
2738 case OP_SCBRAPOS:
2739 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2740 shared_srcw[0] = OVECTOR(offset);
2741 shared_srcw[1] = OVECTOR(offset + 1);
2742 shared_count = 2;
2743
2744 if (common->capture_last_ptr != 0 && !capture_last_found)
2745 {
2746 shared_srcw[2] = common->capture_last_ptr;
2747 shared_count = 3;
2748 capture_last_found = TRUE;
2749 }
2750
2751 private_count = 2;
2752 private_srcw[0] = PRIVATE_DATA(cc);
2753 private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2754 cc += 1 + LINK_SIZE + IMM2_SIZE;
2755 break;
2756
2757 case OP_COND:
2758 /* Might be a hidden SCOND. */
2759 alternative = cc + GET(cc, 1);
2760 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2761 {
2762 private_count = 1;
2763 private_srcw[0] = PRIVATE_DATA(cc);
2764 }
2765 cc += 1 + LINK_SIZE;
2766 break;
2767
2768 CASE_ITERATOR_PRIVATE_DATA_1
2769 if (PRIVATE_DATA(cc))
2770 {
2771 private_count = 1;
2772 private_srcw[0] = PRIVATE_DATA(cc);
2773 }
2774 cc += 2;
2775 #ifdef SUPPORT_UNICODE
2776 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2777 #endif
2778 break;
2779
2780 CASE_ITERATOR_PRIVATE_DATA_2A
2781 if (PRIVATE_DATA(cc))
2782 {
2783 private_count = 2;
2784 private_srcw[0] = PRIVATE_DATA(cc);
2785 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2786 }
2787 cc += 2;
2788 #ifdef SUPPORT_UNICODE
2789 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2790 #endif
2791 break;
2792
2793 CASE_ITERATOR_PRIVATE_DATA_2B
2794 if (PRIVATE_DATA(cc))
2795 {
2796 private_count = 2;
2797 private_srcw[0] = PRIVATE_DATA(cc);
2798 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2799 }
2800 cc += 2 + IMM2_SIZE;
2801 #ifdef SUPPORT_UNICODE
2802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2803 #endif
2804 break;
2805
2806 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2807 if (PRIVATE_DATA(cc))
2808 {
2809 private_count = 1;
2810 private_srcw[0] = PRIVATE_DATA(cc);
2811 }
2812 cc += 1;
2813 break;
2814
2815 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2816 if (PRIVATE_DATA(cc))
2817 {
2818 private_count = 2;
2819 private_srcw[0] = PRIVATE_DATA(cc);
2820 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2821 }
2822 cc += 1;
2823 break;
2824
2825 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2826 if (PRIVATE_DATA(cc))
2827 {
2828 private_count = 2;
2829 private_srcw[0] = PRIVATE_DATA(cc);
2830 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2831 }
2832 cc += 1 + IMM2_SIZE;
2833 break;
2834
2835 case OP_CLASS:
2836 case OP_NCLASS:
2837 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2838 case OP_XCLASS:
2839 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2840 #else
2841 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2842 #endif
2843 if (PRIVATE_DATA(cc) != 0)
2844 switch(get_class_iterator_size(cc + i))
2845 {
2846 case 1:
2847 private_count = 1;
2848 private_srcw[0] = PRIVATE_DATA(cc);
2849 break;
2850
2851 case 2:
2852 private_count = 2;
2853 private_srcw[0] = PRIVATE_DATA(cc);
2854 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2855 break;
2856
2857 default:
2858 SLJIT_UNREACHABLE();
2859 break;
2860 }
2861 cc += i;
2862 break;
2863
2864 case OP_MARK:
2865 case OP_COMMIT_ARG:
2866 case OP_PRUNE_ARG:
2867 case OP_THEN_ARG:
2868 SLJIT_ASSERT(common->mark_ptr != 0);
2869 if (has_quit && !setmark_found)
2870 {
2871 kept_shared_srcw[0] = common->mark_ptr;
2872 kept_shared_count = 1;
2873 setmark_found = TRUE;
2874 }
2875 if (common->control_head_ptr != 0 && !control_head_found)
2876 {
2877 private_srcw[0] = common->control_head_ptr;
2878 private_count = 1;
2879 control_head_found = TRUE;
2880 }
2881 cc += 1 + 2 + cc[1];
2882 break;
2883
2884 case OP_THEN:
2885 SLJIT_ASSERT(common->control_head_ptr != 0);
2886 if (!control_head_found)
2887 {
2888 private_srcw[0] = common->control_head_ptr;
2889 private_count = 1;
2890 control_head_found = TRUE;
2891 }
2892 cc++;
2893 break;
2894
2895 default:
2896 cc = next_opcode(common, cc);
2897 SLJIT_ASSERT(cc != NULL);
2898 break;
2899 }
2900
2901 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2902 {
2903 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2904
2905 for (i = 0; i < private_count; i++)
2906 {
2907 SLJIT_ASSERT(private_srcw[i] != 0);
2908
2909 if (!from_sp)
2910 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2911
2912 if (from_sp || type == recurse_swap_global)
2913 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2914
2915 stackptr += sizeof(sljit_sw);
2916 }
2917 }
2918 else
2919 stackptr += sizeof(sljit_sw) * private_count;
2920
2921 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2922 {
2923 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2924
2925 for (i = 0; i < shared_count; i++)
2926 {
2927 SLJIT_ASSERT(shared_srcw[i] != 0);
2928
2929 if (!from_sp)
2930 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2931
2932 if (from_sp || type == recurse_swap_global)
2933 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2934
2935 stackptr += sizeof(sljit_sw);
2936 }
2937 }
2938 else
2939 stackptr += sizeof(sljit_sw) * shared_count;
2940
2941 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2942 {
2943 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2944
2945 for (i = 0; i < kept_shared_count; i++)
2946 {
2947 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2948
2949 if (!from_sp)
2950 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2951
2952 if (from_sp || type == recurse_swap_global)
2953 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2954
2955 stackptr += sizeof(sljit_sw);
2956 }
2957 }
2958 else
2959 stackptr += sizeof(sljit_sw) * kept_shared_count;
2960 }
2961
2962 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2963
2964 delayed_mem_copy_finish(&status);
2965 }
2966
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2967 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2968 {
2969 PCRE2_SPTR end = bracketend(cc);
2970 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2971
2972 /* Assert captures then. */
2973 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2974 current_offset = NULL;
2975 /* Conditional block does not. */
2976 if (*cc == OP_COND || *cc == OP_SCOND)
2977 has_alternatives = FALSE;
2978
2979 cc = next_opcode(common, cc);
2980 if (has_alternatives)
2981 current_offset = common->then_offsets + (cc - common->start);
2982
2983 while (cc < end)
2984 {
2985 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2986 cc = set_then_offsets(common, cc, current_offset);
2987 else
2988 {
2989 if (*cc == OP_ALT && has_alternatives)
2990 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2991 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2992 *current_offset = 1;
2993 cc = next_opcode(common, cc);
2994 }
2995 }
2996
2997 return end;
2998 }
2999
3000 #undef CASE_ITERATOR_PRIVATE_DATA_1
3001 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3002 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3003 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3004 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3005 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3006
is_powerof2(unsigned int value)3007 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3008 {
3009 return (value & (value - 1)) == 0;
3010 }
3011
set_jumps(jump_list * list,struct sljit_label * label)3012 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3013 {
3014 while (list)
3015 {
3016 /* sljit_set_label is clever enough to do nothing
3017 if either the jump or the label is NULL. */
3018 SET_LABEL(list->jump, label);
3019 list = list->next;
3020 }
3021 }
3022
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3023 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3024 {
3025 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3026 if (list_item)
3027 {
3028 list_item->next = *list;
3029 list_item->jump = jump;
3030 *list = list_item;
3031 }
3032 }
3033
add_stub(compiler_common * common,struct sljit_jump * start)3034 static void add_stub(compiler_common *common, struct sljit_jump *start)
3035 {
3036 DEFINE_COMPILER;
3037 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3038
3039 if (list_item)
3040 {
3041 list_item->start = start;
3042 list_item->quit = LABEL();
3043 list_item->next = common->stubs;
3044 common->stubs = list_item;
3045 }
3046 }
3047
flush_stubs(compiler_common * common)3048 static void flush_stubs(compiler_common *common)
3049 {
3050 DEFINE_COMPILER;
3051 stub_list *list_item = common->stubs;
3052
3053 while (list_item)
3054 {
3055 JUMPHERE(list_item->start);
3056 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3057 JUMPTO(SLJIT_JUMP, list_item->quit);
3058 list_item = list_item->next;
3059 }
3060 common->stubs = NULL;
3061 }
3062
count_match(compiler_common * common)3063 static SLJIT_INLINE void count_match(compiler_common *common)
3064 {
3065 DEFINE_COMPILER;
3066
3067 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3068 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3069 }
3070
allocate_stack(compiler_common * common,int size)3071 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3072 {
3073 /* May destroy all locals and registers except TMP2. */
3074 DEFINE_COMPILER;
3075
3076 SLJIT_ASSERT(size > 0);
3077 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3078 #ifdef DESTROY_REGISTERS
3079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3080 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3081 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3084 #endif
3085 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3086 }
3087
free_stack(compiler_common * common,int size)3088 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3089 {
3090 DEFINE_COMPILER;
3091
3092 SLJIT_ASSERT(size > 0);
3093 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3094 }
3095
allocate_read_only_data(compiler_common * common,sljit_uw size)3096 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3097 {
3098 DEFINE_COMPILER;
3099 sljit_uw *result;
3100
3101 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3102 return NULL;
3103
3104 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3105 if (SLJIT_UNLIKELY(result == NULL))
3106 {
3107 sljit_set_compiler_memory_error(compiler);
3108 return NULL;
3109 }
3110
3111 *(void**)result = common->read_only_data_head;
3112 common->read_only_data_head = (void *)result;
3113 return result + 1;
3114 }
3115
reset_ovector(compiler_common * common,int length)3116 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3117 {
3118 DEFINE_COMPILER;
3119 struct sljit_label *loop;
3120 sljit_s32 i;
3121
3122 /* At this point we can freely use all temporary registers. */
3123 SLJIT_ASSERT(length > 1);
3124 /* TMP1 returns with begin - 1. */
3125 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3126 if (length < 8)
3127 {
3128 for (i = 1; i < length; i++)
3129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3130 }
3131 else
3132 {
3133 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3134 {
3135 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3136 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3137 loop = LABEL();
3138 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3139 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3140 JUMPTO(SLJIT_NOT_ZERO, loop);
3141 }
3142 else
3143 {
3144 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3145 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3146 loop = LABEL();
3147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3148 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3149 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3150 JUMPTO(SLJIT_NOT_ZERO, loop);
3151 }
3152 }
3153 }
3154
reset_early_fail(compiler_common * common)3155 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3156 {
3157 DEFINE_COMPILER;
3158 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3159 sljit_u32 uncleared_size;
3160 sljit_s32 src = SLJIT_IMM;
3161 sljit_s32 i;
3162 struct sljit_label *loop;
3163
3164 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3165
3166 if (size == sizeof(sljit_sw))
3167 {
3168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3169 return;
3170 }
3171
3172 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3173 {
3174 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3175 src = TMP3;
3176 }
3177
3178 if (size <= 6 * sizeof(sljit_sw))
3179 {
3180 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3181 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3182 return;
3183 }
3184
3185 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3186
3187 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3188
3189 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3190
3191 loop = LABEL();
3192 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3193 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3194 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3195 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3196 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3197
3198 if (uncleared_size >= sizeof(sljit_sw))
3199 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3200
3201 if (uncleared_size >= 2 * sizeof(sljit_sw))
3202 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3203 }
3204
do_reset_match(compiler_common * common,int length)3205 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3206 {
3207 DEFINE_COMPILER;
3208 struct sljit_label *loop;
3209 int i;
3210
3211 SLJIT_ASSERT(length > 1);
3212 /* OVECTOR(1) contains the "string begin - 1" constant. */
3213 if (length > 2)
3214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3215 if (length < 8)
3216 {
3217 for (i = 2; i < length; i++)
3218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3219 }
3220 else
3221 {
3222 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3223 {
3224 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3226 loop = LABEL();
3227 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3228 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3229 JUMPTO(SLJIT_NOT_ZERO, loop);
3230 }
3231 else
3232 {
3233 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3234 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3235 loop = LABEL();
3236 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3237 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3238 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3239 JUMPTO(SLJIT_NOT_ZERO, loop);
3240 }
3241 }
3242
3243 if (!HAS_VIRTUAL_REGISTERS)
3244 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3245 else
3246 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3247
3248 if (common->mark_ptr != 0)
3249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3250 if (common->control_head_ptr != 0)
3251 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3252 if (HAS_VIRTUAL_REGISTERS)
3253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3254
3255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3256 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3257 }
3258
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3259 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3260 {
3261 while (current != NULL)
3262 {
3263 switch (current[1])
3264 {
3265 case type_then_trap:
3266 break;
3267
3268 case type_mark:
3269 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3270 return current[3];
3271 break;
3272
3273 default:
3274 SLJIT_UNREACHABLE();
3275 break;
3276 }
3277 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3278 current = (sljit_sw*)current[0];
3279 }
3280 return 0;
3281 }
3282
copy_ovector(compiler_common * common,int topbracket)3283 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3284 {
3285 DEFINE_COMPILER;
3286 struct sljit_label *loop;
3287 BOOL has_pre;
3288
3289 /* At this point we can freely use all registers. */
3290 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3292
3293 if (HAS_VIRTUAL_REGISTERS)
3294 {
3295 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3296 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3297 if (common->mark_ptr != 0)
3298 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3299 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3301 if (common->mark_ptr != 0)
3302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3303 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3304 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3305 }
3306 else
3307 {
3308 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3309 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3310 if (common->mark_ptr != 0)
3311 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3312 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3313 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3314 if (common->mark_ptr != 0)
3315 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3316 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3317 }
3318
3319 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3320
3321 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3322 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3323
3324 loop = LABEL();
3325
3326 if (has_pre)
3327 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3328 else
3329 {
3330 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3331 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3332 }
3333
3334 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3335 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3336 /* Copy the integer value to the output buffer */
3337 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3338 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3339 #endif
3340
3341 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3342 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3343
3344 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3345 JUMPTO(SLJIT_NOT_ZERO, loop);
3346
3347 /* Calculate the return value, which is the maximum ovector value. */
3348 if (topbracket > 1)
3349 {
3350 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3351 {
3352 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3353 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3354
3355 /* OVECTOR(0) is never equal to SLJIT_S2. */
3356 loop = LABEL();
3357 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3358 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3359 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3360 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3361 }
3362 else
3363 {
3364 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3365 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3366
3367 /* OVECTOR(0) is never equal to SLJIT_S2. */
3368 loop = LABEL();
3369 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3370 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3371 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3372 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3373 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3374 }
3375 }
3376 else
3377 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3378 }
3379
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3380 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3381 {
3382 DEFINE_COMPILER;
3383 sljit_s32 mov_opcode;
3384 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3385
3386 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3387 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3388 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3389
3390 if (arguments_reg != ARGUMENTS)
3391 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3392 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3393 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3394 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3395
3396 /* Store match begin and end. */
3397 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3398 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3399 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3400
3401 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3402
3403 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3404 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3405 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3406 #endif
3407 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3408
3409 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3410 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3411 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3412 #endif
3413 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3414
3415 JUMPTO(SLJIT_JUMP, quit);
3416 }
3417
check_start_used_ptr(compiler_common * common)3418 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3419 {
3420 /* May destroy TMP1. */
3421 DEFINE_COMPILER;
3422 struct sljit_jump *jump;
3423
3424 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3425 {
3426 /* The value of -1 must be kept for start_used_ptr! */
3427 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3428 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3429 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3430 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3432 JUMPHERE(jump);
3433 }
3434 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3435 {
3436 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3438 JUMPHERE(jump);
3439 }
3440 }
3441
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3442 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3443 {
3444 /* Detects if the character has an othercase. */
3445 unsigned int c;
3446
3447 #ifdef SUPPORT_UNICODE
3448 if (common->utf || common->ucp)
3449 {
3450 if (common->utf)
3451 {
3452 GETCHAR(c, cc);
3453 }
3454 else
3455 c = *cc;
3456
3457 if (c > 127)
3458 return c != UCD_OTHERCASE(c);
3459
3460 return common->fcc[c] != c;
3461 }
3462 else
3463 #endif
3464 c = *cc;
3465 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3466 }
3467
char_othercase(compiler_common * common,unsigned int c)3468 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3469 {
3470 /* Returns with the othercase. */
3471 #ifdef SUPPORT_UNICODE
3472 if ((common->utf || common->ucp) && c > 127)
3473 return UCD_OTHERCASE(c);
3474 #endif
3475 return TABLE_GET(c, common->fcc, c);
3476 }
3477
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3478 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3479 {
3480 /* Detects if the character and its othercase has only 1 bit difference. */
3481 unsigned int c, oc, bit;
3482 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3483 int n;
3484 #endif
3485
3486 #ifdef SUPPORT_UNICODE
3487 if (common->utf || common->ucp)
3488 {
3489 if (common->utf)
3490 {
3491 GETCHAR(c, cc);
3492 }
3493 else
3494 c = *cc;
3495
3496 if (c <= 127)
3497 oc = common->fcc[c];
3498 else
3499 oc = UCD_OTHERCASE(c);
3500 }
3501 else
3502 {
3503 c = *cc;
3504 oc = TABLE_GET(c, common->fcc, c);
3505 }
3506 #else
3507 c = *cc;
3508 oc = TABLE_GET(c, common->fcc, c);
3509 #endif
3510
3511 SLJIT_ASSERT(c != oc);
3512
3513 bit = c ^ oc;
3514 /* Optimized for English alphabet. */
3515 if (c <= 127 && bit == 0x20)
3516 return (0 << 8) | 0x20;
3517
3518 /* Since c != oc, they must have at least 1 bit difference. */
3519 if (!is_powerof2(bit))
3520 return 0;
3521
3522 #if PCRE2_CODE_UNIT_WIDTH == 8
3523
3524 #ifdef SUPPORT_UNICODE
3525 if (common->utf && c > 127)
3526 {
3527 n = GET_EXTRALEN(*cc);
3528 while ((bit & 0x3f) == 0)
3529 {
3530 n--;
3531 bit >>= 6;
3532 }
3533 return (n << 8) | bit;
3534 }
3535 #endif /* SUPPORT_UNICODE */
3536 return (0 << 8) | bit;
3537
3538 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3539
3540 #ifdef SUPPORT_UNICODE
3541 if (common->utf && c > 65535)
3542 {
3543 if (bit >= (1u << 10))
3544 bit >>= 10;
3545 else
3546 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3547 }
3548 #endif /* SUPPORT_UNICODE */
3549 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3550
3551 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3552 }
3553
check_partial(compiler_common * common,BOOL force)3554 static void check_partial(compiler_common *common, BOOL force)
3555 {
3556 /* Checks whether a partial matching is occurred. Does not modify registers. */
3557 DEFINE_COMPILER;
3558 struct sljit_jump *jump = NULL;
3559
3560 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3561
3562 if (common->mode == PCRE2_JIT_COMPLETE)
3563 return;
3564
3565 if (!force && !common->allow_empty_partial)
3566 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3567 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3568 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3569
3570 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3572 else
3573 {
3574 if (common->partialmatchlabel != NULL)
3575 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3576 else
3577 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3578 }
3579
3580 if (jump != NULL)
3581 JUMPHERE(jump);
3582 }
3583
check_str_end(compiler_common * common,jump_list ** end_reached)3584 static void check_str_end(compiler_common *common, jump_list **end_reached)
3585 {
3586 /* Does not affect registers. Usually used in a tight spot. */
3587 DEFINE_COMPILER;
3588 struct sljit_jump *jump;
3589
3590 if (common->mode == PCRE2_JIT_COMPLETE)
3591 {
3592 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3593 return;
3594 }
3595
3596 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3597 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3598 {
3599 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3601 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3602 }
3603 else
3604 {
3605 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3606 if (common->partialmatchlabel != NULL)
3607 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3608 else
3609 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3610 }
3611 JUMPHERE(jump);
3612 }
3613
detect_partial_match(compiler_common * common,jump_list ** backtracks)3614 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3615 {
3616 DEFINE_COMPILER;
3617 struct sljit_jump *jump;
3618
3619 if (common->mode == PCRE2_JIT_COMPLETE)
3620 {
3621 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3622 return;
3623 }
3624
3625 /* Partial matching mode. */
3626 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3627 if (!common->allow_empty_partial)
3628 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3629 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3630 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3631
3632 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3633 {
3634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3635 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3636 }
3637 else
3638 {
3639 if (common->partialmatchlabel != NULL)
3640 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3641 else
3642 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3643 }
3644 JUMPHERE(jump);
3645 }
3646
process_partial_match(compiler_common * common)3647 static void process_partial_match(compiler_common *common)
3648 {
3649 DEFINE_COMPILER;
3650 struct sljit_jump *jump;
3651
3652 /* Partial matching mode. */
3653 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3654 {
3655 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3656 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3657 JUMPHERE(jump);
3658 }
3659 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3660 {
3661 if (common->partialmatchlabel != NULL)
3662 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3663 else
3664 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3665 }
3666 }
3667
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3668 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3669 {
3670 DEFINE_COMPILER;
3671
3672 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3673 process_partial_match(common);
3674 }
3675
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3676 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3677 {
3678 /* Reads the character into TMP1, keeps STR_PTR.
3679 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3680 DEFINE_COMPILER;
3681 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3682 struct sljit_jump *jump;
3683 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3684
3685 SLJIT_UNUSED_ARG(max);
3686 SLJIT_UNUSED_ARG(dst);
3687 SLJIT_UNUSED_ARG(dstw);
3688 SLJIT_UNUSED_ARG(backtracks);
3689
3690 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3691
3692 #ifdef SUPPORT_UNICODE
3693 #if PCRE2_CODE_UNIT_WIDTH == 8
3694 if (common->utf)
3695 {
3696 if (max < 128) return;
3697
3698 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3699 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3700 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3701 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3702 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3703 if (backtracks && common->invalid_utf)
3704 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3705 JUMPHERE(jump);
3706 }
3707 #elif PCRE2_CODE_UNIT_WIDTH == 16
3708 if (common->utf)
3709 {
3710 if (max < 0xd800) return;
3711
3712 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3713
3714 if (common->invalid_utf)
3715 {
3716 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3717 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3718 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3719 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3720 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3721 if (backtracks && common->invalid_utf)
3722 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3723 }
3724 else
3725 {
3726 /* TMP2 contains the high surrogate. */
3727 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3728 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3729 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3730 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3731 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3732 }
3733
3734 JUMPHERE(jump);
3735 }
3736 #elif PCRE2_CODE_UNIT_WIDTH == 32
3737 if (common->invalid_utf)
3738 {
3739 if (max < 0xd800) return;
3740
3741 if (backtracks != NULL)
3742 {
3743 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3744 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3745 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3746 }
3747 else
3748 {
3749 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3750 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3751 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3752 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3753 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3754 }
3755 }
3756 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3757 #endif /* SUPPORT_UNICODE */
3758 }
3759
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3760 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3761 {
3762 /* Reads one character back without moving STR_PTR. TMP2 must
3763 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3764 DEFINE_COMPILER;
3765
3766 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3767 struct sljit_jump *jump;
3768 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3769
3770 SLJIT_UNUSED_ARG(max);
3771 SLJIT_UNUSED_ARG(backtracks);
3772
3773 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3774
3775 #ifdef SUPPORT_UNICODE
3776 #if PCRE2_CODE_UNIT_WIDTH == 8
3777 if (common->utf)
3778 {
3779 if (max < 128) return;
3780
3781 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3782 if (common->invalid_utf)
3783 {
3784 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3785 if (backtracks != NULL)
3786 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3787 }
3788 else
3789 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3790 JUMPHERE(jump);
3791 }
3792 #elif PCRE2_CODE_UNIT_WIDTH == 16
3793 if (common->utf)
3794 {
3795 if (max < 0xd800) return;
3796
3797 if (common->invalid_utf)
3798 {
3799 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3800 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3801 if (backtracks != NULL)
3802 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3803 }
3804 else
3805 {
3806 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3807 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3808 /* TMP2 contains the low surrogate. */
3809 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3810 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3811 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3812 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3813 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3814 }
3815 JUMPHERE(jump);
3816 }
3817 #elif PCRE2_CODE_UNIT_WIDTH == 32
3818 if (common->invalid_utf)
3819 {
3820 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3821 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3822 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3823 }
3824 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3825 #endif /* SUPPORT_UNICODE */
3826 }
3827
3828 #define READ_CHAR_UPDATE_STR_PTR 0x1
3829 #define READ_CHAR_UTF8_NEWLINE 0x2
3830 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3831 #define READ_CHAR_VALID_UTF 0x4
3832
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3833 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3834 jump_list **backtracks, sljit_u32 options)
3835 {
3836 /* Reads the precise value of a character into TMP1, if the character is
3837 between min and max (c >= min && c <= max). Otherwise it returns with a value
3838 outside the range. Does not check STR_END. */
3839 DEFINE_COMPILER;
3840 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3841 struct sljit_jump *jump;
3842 #endif
3843 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3844 struct sljit_jump *jump2;
3845 #endif
3846
3847 SLJIT_UNUSED_ARG(min);
3848 SLJIT_UNUSED_ARG(max);
3849 SLJIT_UNUSED_ARG(backtracks);
3850 SLJIT_UNUSED_ARG(options);
3851 SLJIT_ASSERT(min <= max);
3852
3853 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3854 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3855
3856 #ifdef SUPPORT_UNICODE
3857 #if PCRE2_CODE_UNIT_WIDTH == 8
3858 if (common->utf)
3859 {
3860 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3861
3862 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3863 {
3864 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3865
3866 if (options & READ_CHAR_UTF8_NEWLINE)
3867 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3868 else
3869 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3870
3871 if (backtracks != NULL)
3872 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873 JUMPHERE(jump);
3874 return;
3875 }
3876
3877 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3878 if (min >= 0x10000)
3879 {
3880 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3881 if (options & READ_CHAR_UPDATE_STR_PTR)
3882 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3883 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3884 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3885 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3886 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3887 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3888 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3889 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3890 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3891 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3892 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3893 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3894 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3895 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3896 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3897 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3898 JUMPHERE(jump2);
3899 if (options & READ_CHAR_UPDATE_STR_PTR)
3900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3901 }
3902 else if (min >= 0x800 && max <= 0xffff)
3903 {
3904 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3905 if (options & READ_CHAR_UPDATE_STR_PTR)
3906 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3907 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3908 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3909 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3910 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3911 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3912 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3913 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3914 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3915 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3916 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3917 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3918 JUMPHERE(jump2);
3919 if (options & READ_CHAR_UPDATE_STR_PTR)
3920 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3921 }
3922 else if (max >= 0x800)
3923 {
3924 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3925 }
3926 else if (max < 128)
3927 {
3928 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3930 }
3931 else
3932 {
3933 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3934 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3935 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3936 else
3937 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3938 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3939 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3940 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3941 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3942 if (options & READ_CHAR_UPDATE_STR_PTR)
3943 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3944 }
3945 JUMPHERE(jump);
3946 }
3947 #elif PCRE2_CODE_UNIT_WIDTH == 16
3948 if (common->utf)
3949 {
3950 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3951
3952 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3953 {
3954 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3955 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3956
3957 if (options & READ_CHAR_UTF8_NEWLINE)
3958 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3959 else
3960 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3961
3962 if (backtracks != NULL)
3963 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3964 JUMPHERE(jump);
3965 return;
3966 }
3967
3968 if (max >= 0x10000)
3969 {
3970 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3971 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3972 /* TMP2 contains the high surrogate. */
3973 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3974 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3975 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3976 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3977 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3978 JUMPHERE(jump);
3979 return;
3980 }
3981
3982 /* Skip low surrogate if necessary. */
3983 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3984
3985 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3986 {
3987 if (options & READ_CHAR_UPDATE_STR_PTR)
3988 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3989 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3990 if (options & READ_CHAR_UPDATE_STR_PTR)
3991 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3992 if (max >= 0xd800)
3993 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3994 }
3995 else
3996 {
3997 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3998 if (options & READ_CHAR_UPDATE_STR_PTR)
3999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4000 if (max >= 0xd800)
4001 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4002 JUMPHERE(jump);
4003 }
4004 }
4005 #elif PCRE2_CODE_UNIT_WIDTH == 32
4006 if (common->invalid_utf)
4007 {
4008 if (backtracks != NULL)
4009 {
4010 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013 }
4014 else
4015 {
4016 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4017 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4018 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4019 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4020 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4021 }
4022 }
4023 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4024 #endif /* SUPPORT_UNICODE */
4025 }
4026
4027 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4028
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4029 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4030 {
4031 /* Tells whether the character codes below 128 are enough
4032 to determine a match. */
4033 const sljit_u8 value = nclass ? 0xff : 0;
4034 const sljit_u8 *end = bitset + 32;
4035
4036 bitset += 16;
4037 do
4038 {
4039 if (*bitset++ != value)
4040 return FALSE;
4041 }
4042 while (bitset < end);
4043 return TRUE;
4044 }
4045
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4046 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4047 {
4048 /* Reads the precise character type of a character into TMP1, if the character
4049 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4050 full_read argument tells whether characters above max are accepted or not. */
4051 DEFINE_COMPILER;
4052 struct sljit_jump *jump;
4053
4054 SLJIT_ASSERT(common->utf);
4055
4056 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4057 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4058
4059 /* All values > 127 are zero in ctypes. */
4060 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4061
4062 if (negated)
4063 {
4064 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4065
4066 if (common->invalid_utf)
4067 {
4068 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4069 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4071 }
4072 else
4073 {
4074 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4075 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4076 }
4077 JUMPHERE(jump);
4078 }
4079 }
4080
4081 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4082
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4083 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4084 {
4085 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4086 DEFINE_COMPILER;
4087 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4088 struct sljit_jump *jump;
4089 #endif
4090 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4091 struct sljit_jump *jump2;
4092 #endif
4093
4094 SLJIT_UNUSED_ARG(backtracks);
4095 SLJIT_UNUSED_ARG(negated);
4096
4097 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4098 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4099
4100 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4101 if (common->utf)
4102 {
4103 /* The result of this read may be unused, but saves an "else" part. */
4104 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4105 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4106
4107 if (!negated)
4108 {
4109 if (common->invalid_utf)
4110 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4111
4112 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4113 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4114 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4115 if (common->invalid_utf)
4116 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4117
4118 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4119 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4120 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4121 if (common->invalid_utf)
4122 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4123
4124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4125 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4126 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4127 JUMPHERE(jump2);
4128 }
4129 else if (common->invalid_utf)
4130 {
4131 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4132 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4133 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4134
4135 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4136 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4137 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4138 JUMPHERE(jump2);
4139 }
4140 else
4141 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4142
4143 JUMPHERE(jump);
4144 return;
4145 }
4146 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4147
4148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4149 if (common->invalid_utf && negated)
4150 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4152
4153 #if PCRE2_CODE_UNIT_WIDTH != 8
4154 /* The ctypes array contains only 256 values. */
4155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4156 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4157 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4158 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4159 #if PCRE2_CODE_UNIT_WIDTH != 8
4160 JUMPHERE(jump);
4161 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4162
4163 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4164 if (common->utf && negated)
4165 {
4166 /* Skip low surrogate if necessary. */
4167 if (!common->invalid_utf)
4168 {
4169 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4170
4171 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4172 {
4173 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4174 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4175 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4176 }
4177 else
4178 {
4179 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4180 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4181 JUMPHERE(jump);
4182 }
4183 return;
4184 }
4185
4186 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4187 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4188 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4189 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4190
4191 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4192 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4193 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4194 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4195
4196 JUMPHERE(jump);
4197 return;
4198 }
4199 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4200 }
4201
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4202 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4203 {
4204 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4205 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4206 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4207 DEFINE_COMPILER;
4208
4209 SLJIT_UNUSED_ARG(backtracks);
4210 SLJIT_UNUSED_ARG(must_be_valid);
4211
4212 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4213 struct sljit_jump *jump;
4214 #endif
4215
4216 #ifdef SUPPORT_UNICODE
4217 #if PCRE2_CODE_UNIT_WIDTH == 8
4218 struct sljit_label *label;
4219
4220 if (common->utf)
4221 {
4222 if (!must_be_valid && common->invalid_utf)
4223 {
4224 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4225 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4226 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4227 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4228 if (backtracks != NULL)
4229 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4230 JUMPHERE(jump);
4231 return;
4232 }
4233
4234 label = LABEL();
4235 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4236 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4237 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4238 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4239 return;
4240 }
4241 #elif PCRE2_CODE_UNIT_WIDTH == 16
4242 if (common->utf)
4243 {
4244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4245 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246
4247 if (!must_be_valid && common->invalid_utf)
4248 {
4249 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4250 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4251 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4252 if (backtracks != NULL)
4253 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4254 JUMPHERE(jump);
4255 return;
4256 }
4257
4258 /* Skip low surrogate if necessary. */
4259 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4260 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4261 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4262 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4263 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4264 return;
4265 }
4266 #elif PCRE2_CODE_UNIT_WIDTH == 32
4267 if (common->invalid_utf && !must_be_valid)
4268 {
4269 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4270 if (backtracks != NULL)
4271 {
4272 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4273 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4274 return;
4275 }
4276
4277 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4278 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4279 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4280 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4281 return;
4282 }
4283 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4284 #endif /* SUPPORT_UNICODE */
4285 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4286 }
4287
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4288 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4289 {
4290 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4291 DEFINE_COMPILER;
4292 struct sljit_jump *jump;
4293
4294 if (nltype == NLTYPE_ANY)
4295 {
4296 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4297 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4298 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4299 }
4300 else if (nltype == NLTYPE_ANYCRLF)
4301 {
4302 if (jumpifmatch)
4303 {
4304 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4305 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4306 }
4307 else
4308 {
4309 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4310 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4311 JUMPHERE(jump);
4312 }
4313 }
4314 else
4315 {
4316 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4317 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4318 }
4319 }
4320
4321 #ifdef SUPPORT_UNICODE
4322
4323 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4324 static void do_utfreadchar(compiler_common *common)
4325 {
4326 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4327 of the character (>= 0xc0). Return char value in TMP1. */
4328 DEFINE_COMPILER;
4329 struct sljit_jump *jump;
4330
4331 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4332 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4333 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4334 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4335 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4336
4337 /* Searching for the first zero. */
4338 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4339 jump = JUMP(SLJIT_NOT_ZERO);
4340 /* Two byte sequence. */
4341 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4342 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4343 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4344
4345 JUMPHERE(jump);
4346 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4347 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4348 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4350
4351 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4352 jump = JUMP(SLJIT_NOT_ZERO);
4353 /* Three byte sequence. */
4354 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4356 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4357
4358 /* Four byte sequence. */
4359 JUMPHERE(jump);
4360 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4361 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4363 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4364 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4365 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4366 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4367 }
4368
do_utfreadtype8(compiler_common * common)4369 static void do_utfreadtype8(compiler_common *common)
4370 {
4371 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4372 of the character (>= 0xc0). Return value in TMP1. */
4373 DEFINE_COMPILER;
4374 struct sljit_jump *jump;
4375 struct sljit_jump *compare;
4376
4377 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4378
4379 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4380 jump = JUMP(SLJIT_NOT_ZERO);
4381 /* Two byte sequence. */
4382 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4383 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4384 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4385 /* The upper 5 bits are known at this point. */
4386 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4387 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4388 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4389 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4390 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4391 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4392
4393 JUMPHERE(compare);
4394 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4395 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4396
4397 /* We only have types for characters less than 256. */
4398 JUMPHERE(jump);
4399 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4401 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4402 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4403 }
4404
do_utfreadchar_invalid(compiler_common * common)4405 static void do_utfreadchar_invalid(compiler_common *common)
4406 {
4407 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4408 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4409 undefined for invalid characters. */
4410 DEFINE_COMPILER;
4411 sljit_s32 i;
4412 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4413 struct sljit_jump *jump;
4414 struct sljit_jump *buffer_end_close;
4415 struct sljit_label *three_byte_entry;
4416 struct sljit_label *exit_invalid_label;
4417 struct sljit_jump *exit_invalid[11];
4418
4419 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4420
4421 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4422
4423 /* Usually more than 3 characters remained in the subject buffer. */
4424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4425
4426 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4427 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4428
4429 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4430
4431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4432 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4433 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4434 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4435 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4436 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4437
4438 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4439 jump = JUMP(SLJIT_NOT_ZERO);
4440
4441 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4442 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4443
4444 JUMPHERE(jump);
4445
4446 /* Three-byte sequence. */
4447 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4448 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4449 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4450 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4451 if (has_cmov)
4452 {
4453 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4454 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4455 exit_invalid[2] = NULL;
4456 }
4457 else
4458 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4459
4460 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4461 jump = JUMP(SLJIT_NOT_ZERO);
4462
4463 three_byte_entry = LABEL();
4464
4465 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4466 if (has_cmov)
4467 {
4468 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4469 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4470 exit_invalid[3] = NULL;
4471 }
4472 else
4473 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4474 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4475 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4476
4477 if (has_cmov)
4478 {
4479 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4480 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4481 exit_invalid[4] = NULL;
4482 }
4483 else
4484 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4485 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4486
4487 JUMPHERE(jump);
4488
4489 /* Four-byte sequence. */
4490 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4491 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4492 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4493 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4494 if (has_cmov)
4495 {
4496 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4497 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4498 exit_invalid[5] = NULL;
4499 }
4500 else
4501 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4502
4503 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4504 if (has_cmov)
4505 {
4506 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4507 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4508 exit_invalid[6] = NULL;
4509 }
4510 else
4511 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4512
4513 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4514 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4515
4516 JUMPHERE(buffer_end_close);
4517 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4518 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4519
4520 /* Two-byte sequence. */
4521 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4522 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4523 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4524 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4525 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4526 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4527
4528 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4529 jump = JUMP(SLJIT_NOT_ZERO);
4530
4531 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4532
4533 /* Three-byte sequence. */
4534 JUMPHERE(jump);
4535 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4536
4537 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4538 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4539 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4540 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4541 if (has_cmov)
4542 {
4543 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4544 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4545 exit_invalid[10] = NULL;
4546 }
4547 else
4548 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4549
4550 /* One will be substracted from STR_PTR later. */
4551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4552
4553 /* Four byte sequences are not possible. */
4554 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4555
4556 exit_invalid_label = LABEL();
4557 for (i = 0; i < 11; i++)
4558 sljit_set_label(exit_invalid[i], exit_invalid_label);
4559
4560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4561 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4562 }
4563
do_utfreadnewline_invalid(compiler_common * common)4564 static void do_utfreadnewline_invalid(compiler_common *common)
4565 {
4566 /* Slow decoding a UTF-8 character, specialized for newlines.
4567 TMP1 contains the first byte of the character (>= 0xc0). Return
4568 char value in TMP1. */
4569 DEFINE_COMPILER;
4570 struct sljit_label *loop;
4571 struct sljit_label *skip_start;
4572 struct sljit_label *three_byte_exit;
4573 struct sljit_jump *jump[5];
4574
4575 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4576
4577 if (common->nltype != NLTYPE_ANY)
4578 {
4579 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4580
4581 /* All newlines are ascii, just skip intermediate octets. */
4582 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4583 loop = LABEL();
4584 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4585 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4586 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4587 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4588
4589 JUMPHERE(jump[0]);
4590
4591 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4592 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4593 return;
4594 }
4595
4596 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4599
4600 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4601 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4602
4603 skip_start = LABEL();
4604 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4605 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4606
4607 /* Skip intermediate octets. */
4608 loop = LABEL();
4609 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4610 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4611 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4612 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4613 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4614
4615 JUMPHERE(jump[3]);
4616 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4617
4618 three_byte_exit = LABEL();
4619 JUMPHERE(jump[0]);
4620 JUMPHERE(jump[4]);
4621
4622 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4623 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4624
4625 /* Two byte long newline: 0x85. */
4626 JUMPHERE(jump[1]);
4627 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4628
4629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4630 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4631
4632 /* Three byte long newlines: 0x2028 and 0x2029. */
4633 JUMPHERE(jump[2]);
4634 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4635 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4636
4637 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4638 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4639
4640 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4641 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4642
4643 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4644 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4645 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4646 }
4647
do_utfmoveback_invalid(compiler_common * common)4648 static void do_utfmoveback_invalid(compiler_common *common)
4649 {
4650 /* Goes one character back. */
4651 DEFINE_COMPILER;
4652 sljit_s32 i;
4653 struct sljit_jump *jump;
4654 struct sljit_jump *buffer_start_close;
4655 struct sljit_label *exit_ok_label;
4656 struct sljit_label *exit_invalid_label;
4657 struct sljit_jump *exit_invalid[7];
4658
4659 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4660
4661 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4662 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4663
4664 /* Two-byte sequence. */
4665 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4666
4667 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4668
4669 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4670 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4671
4672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4674 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4675
4676 /* Three-byte sequence. */
4677 JUMPHERE(jump);
4678 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4679
4680 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4681
4682 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4683 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4684
4685 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4687 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4688
4689 /* Four-byte sequence. */
4690 JUMPHERE(jump);
4691 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4692 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4693
4694 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4695 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4696 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4697
4698 exit_ok_label = LABEL();
4699 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4700 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4701
4702 /* Two-byte sequence. */
4703 JUMPHERE(buffer_start_close);
4704 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4705
4706 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4707
4708 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4709
4710 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4711 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4712
4713 /* Three-byte sequence. */
4714 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4715 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4716 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4717
4718 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4719
4720 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4721 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4722
4723 /* Four-byte sequences are not possible. */
4724
4725 exit_invalid_label = LABEL();
4726 sljit_set_label(exit_invalid[5], exit_invalid_label);
4727 sljit_set_label(exit_invalid[6], exit_invalid_label);
4728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4729 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4730 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4731
4732 JUMPHERE(exit_invalid[4]);
4733 /* -2 + 4 = 2 */
4734 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4735
4736 exit_invalid_label = LABEL();
4737 for (i = 0; i < 4; i++)
4738 sljit_set_label(exit_invalid[i], exit_invalid_label);
4739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4741 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4742 }
4743
do_utfpeakcharback(compiler_common * common)4744 static void do_utfpeakcharback(compiler_common *common)
4745 {
4746 /* Peak a character back. Does not modify STR_PTR. */
4747 DEFINE_COMPILER;
4748 struct sljit_jump *jump[2];
4749
4750 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4751
4752 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4753 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4754 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4755
4756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4758 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4759
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4761 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4762 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4763 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4764 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4765
4766 JUMPHERE(jump[1]);
4767 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4768 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4769 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4770 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4771
4772 JUMPHERE(jump[0]);
4773 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4774 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4775 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4776 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4777
4778 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779 }
4780
do_utfpeakcharback_invalid(compiler_common * common)4781 static void do_utfpeakcharback_invalid(compiler_common *common)
4782 {
4783 /* Peak a character back. Does not modify STR_PTR. */
4784 DEFINE_COMPILER;
4785 sljit_s32 i;
4786 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4787 struct sljit_jump *jump[2];
4788 struct sljit_label *two_byte_entry;
4789 struct sljit_label *three_byte_entry;
4790 struct sljit_label *exit_invalid_label;
4791 struct sljit_jump *exit_invalid[8];
4792
4793 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4794
4795 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4796 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4797 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4798
4799 /* Two-byte sequence. */
4800 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4801 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4802 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4803
4804 two_byte_entry = LABEL();
4805 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4806 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4807 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4808 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809
4810 JUMPHERE(jump[1]);
4811 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4812 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4813 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4814 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4815 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4816
4817 /* Three-byte sequence. */
4818 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4819 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4820 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4821
4822 three_byte_entry = LABEL();
4823 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4824 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4825
4826 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4827 if (has_cmov)
4828 {
4829 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4830 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4831 exit_invalid[2] = NULL;
4832 }
4833 else
4834 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4835
4836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4837 if (has_cmov)
4838 {
4839 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4840 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4841 exit_invalid[3] = NULL;
4842 }
4843 else
4844 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4845
4846 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4847
4848 JUMPHERE(jump[1]);
4849 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4850 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4851 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4852 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4853
4854 /* Four-byte sequence. */
4855 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4856 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4857 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4858 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4859 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4860 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4861
4862 if (has_cmov)
4863 {
4864 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4865 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4866 exit_invalid[5] = NULL;
4867 }
4868 else
4869 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4870
4871 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4872 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4873
4874 JUMPHERE(jump[0]);
4875 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4876 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4877
4878 /* Two-byte sequence. */
4879 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4880 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4881 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4882
4883 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4884 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4885 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4886 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4887 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4888
4889 /* Three-byte sequence. */
4890 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4891 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4892 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4893
4894 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4895 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4896
4897 JUMPHERE(jump[0]);
4898 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4899
4900 /* Two-byte sequence. */
4901 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4902 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4903 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4904
4905 exit_invalid_label = LABEL();
4906 for (i = 0; i < 8; i++)
4907 sljit_set_label(exit_invalid[i], exit_invalid_label);
4908
4909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4910 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4911 }
4912
4913 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4914
4915 #if PCRE2_CODE_UNIT_WIDTH == 16
4916
do_utfreadchar_invalid(compiler_common * common)4917 static void do_utfreadchar_invalid(compiler_common *common)
4918 {
4919 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4920 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4921 undefined for invalid characters. */
4922 DEFINE_COMPILER;
4923 struct sljit_jump *exit_invalid[3];
4924
4925 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4926
4927 /* TMP2 contains the high surrogate. */
4928 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4929 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4930
4931 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4932 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4934
4935 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4936 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4937 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4938
4939 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4940 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4941
4942 JUMPHERE(exit_invalid[0]);
4943 JUMPHERE(exit_invalid[1]);
4944 JUMPHERE(exit_invalid[2]);
4945 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4946 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4947 }
4948
do_utfreadnewline_invalid(compiler_common * common)4949 static void do_utfreadnewline_invalid(compiler_common *common)
4950 {
4951 /* Slow decoding a UTF-16 character, specialized for newlines.
4952 TMP1 contains the first half of the character (>= 0xd800). Return
4953 char value in TMP1. */
4954
4955 DEFINE_COMPILER;
4956 struct sljit_jump *exit_invalid[2];
4957
4958 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4959
4960 /* TMP2 contains the high surrogate. */
4961 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4962
4963 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4964 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4965
4966 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4967 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4968 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4970 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4971 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4972
4973 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974
4975 JUMPHERE(exit_invalid[0]);
4976 JUMPHERE(exit_invalid[1]);
4977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4978 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4979 }
4980
do_utfmoveback_invalid(compiler_common * common)4981 static void do_utfmoveback_invalid(compiler_common *common)
4982 {
4983 /* Goes one character back. */
4984 DEFINE_COMPILER;
4985 struct sljit_jump *exit_invalid[3];
4986
4987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4988
4989 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4990 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4991
4992 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4993 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4994 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4995
4996 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4998 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4999
5000 JUMPHERE(exit_invalid[0]);
5001 JUMPHERE(exit_invalid[1]);
5002 JUMPHERE(exit_invalid[2]);
5003
5004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5006 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5007 }
5008
do_utfpeakcharback_invalid(compiler_common * common)5009 static void do_utfpeakcharback_invalid(compiler_common *common)
5010 {
5011 /* Peak a character back. Does not modify STR_PTR. */
5012 DEFINE_COMPILER;
5013 struct sljit_jump *jump;
5014 struct sljit_jump *exit_invalid[3];
5015
5016 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5017
5018 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5019 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5020 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5021 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5022
5023 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5025 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5026 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5027 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5029
5030 JUMPHERE(jump);
5031 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5032
5033 JUMPHERE(exit_invalid[0]);
5034 JUMPHERE(exit_invalid[1]);
5035 JUMPHERE(exit_invalid[2]);
5036
5037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5038 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039 }
5040
5041 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5042
5043 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5044 #define UCD_BLOCK_MASK 127
5045 #define UCD_BLOCK_SHIFT 7
5046
do_getucd(compiler_common * common)5047 static void do_getucd(compiler_common *common)
5048 {
5049 /* Search the UCD record for the character comes in TMP1.
5050 Returns chartype in TMP1 and UCD offset in TMP2. */
5051 DEFINE_COMPILER;
5052 #if PCRE2_CODE_UNIT_WIDTH == 32
5053 struct sljit_jump *jump;
5054 #endif
5055
5056 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5057 /* dummy_ucd_record */
5058 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5059 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5060 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5061 #endif
5062
5063 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5064
5065 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066
5067 #if PCRE2_CODE_UNIT_WIDTH == 32
5068 if (!common->utf)
5069 {
5070 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5071 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5072 JUMPHERE(jump);
5073 }
5074 #endif
5075
5076 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5077 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5078 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5079 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5080 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5082 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5083 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5084 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085 }
5086
do_getucdtype(compiler_common * common)5087 static void do_getucdtype(compiler_common *common)
5088 {
5089 /* Search the UCD record for the character comes in TMP1.
5090 Returns chartype in TMP1 and UCD offset in TMP2. */
5091 DEFINE_COMPILER;
5092 #if PCRE2_CODE_UNIT_WIDTH == 32
5093 struct sljit_jump *jump;
5094 #endif
5095
5096 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5097 /* dummy_ucd_record */
5098 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5099 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5100 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5101 #endif
5102
5103 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5104
5105 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5106
5107 #if PCRE2_CODE_UNIT_WIDTH == 32
5108 if (!common->utf)
5109 {
5110 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5112 JUMPHERE(jump);
5113 }
5114 #endif
5115
5116 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5117 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5118 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5119 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5120 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5123 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5124
5125 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5127 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5128 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5129 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5130
5131 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5132 }
5133
5134 #endif /* SUPPORT_UNICODE */
5135
mainloop_entry(compiler_common * common)5136 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5137 {
5138 DEFINE_COMPILER;
5139 struct sljit_label *mainloop;
5140 struct sljit_label *newlinelabel = NULL;
5141 struct sljit_jump *start;
5142 struct sljit_jump *end = NULL;
5143 struct sljit_jump *end2 = NULL;
5144 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5145 struct sljit_label *loop;
5146 struct sljit_jump *jump;
5147 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5148 jump_list *newline = NULL;
5149 sljit_u32 overall_options = common->re->overall_options;
5150 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5151 BOOL newlinecheck = FALSE;
5152 BOOL readuchar = FALSE;
5153
5154 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5155 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5156 newlinecheck = TRUE;
5157
5158 SLJIT_ASSERT(common->abort_label == NULL);
5159
5160 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5161 {
5162 /* Search for the end of the first line. */
5163 SLJIT_ASSERT(common->match_end_ptr != 0);
5164 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5165
5166 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5167 {
5168 mainloop = LABEL();
5169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5170 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5171 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5172 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5173 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5174 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5175 JUMPHERE(end);
5176 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5177 }
5178 else
5179 {
5180 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5181 mainloop = LABEL();
5182 /* Continual stores does not cause data dependency. */
5183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5184 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5185 check_newlinechar(common, common->nltype, &newline, TRUE);
5186 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5187 JUMPHERE(end);
5188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5189 set_jumps(newline, LABEL());
5190 }
5191
5192 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5193 }
5194 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5195 {
5196 /* Check whether offset limit is set and valid. */
5197 SLJIT_ASSERT(common->match_end_ptr != 0);
5198
5199 if (HAS_VIRTUAL_REGISTERS)
5200 {
5201 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5203 }
5204 else
5205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5206
5207 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5208 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5209 if (HAS_VIRTUAL_REGISTERS)
5210 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5211 else
5212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5213
5214 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5215 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5216 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5217 if (HAS_VIRTUAL_REGISTERS)
5218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5219
5220 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5221 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5222 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5223 JUMPHERE(end2);
5224 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5225 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5226 JUMPHERE(end);
5227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5228 }
5229
5230 start = JUMP(SLJIT_JUMP);
5231
5232 if (newlinecheck)
5233 {
5234 newlinelabel = LABEL();
5235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5236 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5238 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5239 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5240 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5241 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5242 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5243 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5244 end2 = JUMP(SLJIT_JUMP);
5245 }
5246
5247 mainloop = LABEL();
5248
5249 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5250 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5251 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5252 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5253 if (newlinecheck) readuchar = TRUE;
5254
5255 if (readuchar)
5256 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5257
5258 if (newlinecheck)
5259 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5260
5261 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5262 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5263 #if PCRE2_CODE_UNIT_WIDTH == 8
5264 if (common->invalid_utf)
5265 {
5266 /* Skip continuation code units. */
5267 loop = LABEL();
5268 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5269 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5270 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5271 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5272 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5273 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5274 JUMPHERE(jump);
5275 }
5276 else if (common->utf)
5277 {
5278 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5279 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5280 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5281 JUMPHERE(jump);
5282 }
5283 #elif PCRE2_CODE_UNIT_WIDTH == 16
5284 if (common->invalid_utf)
5285 {
5286 /* Skip continuation code units. */
5287 loop = LABEL();
5288 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5289 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5292 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5293 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5294 JUMPHERE(jump);
5295 }
5296 else if (common->utf)
5297 {
5298 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5299
5300 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5301 {
5302 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5303 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5304 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5305 }
5306 else
5307 {
5308 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5309 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5310 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5312 }
5313 }
5314 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5315 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5316 JUMPHERE(start);
5317
5318 if (newlinecheck)
5319 {
5320 JUMPHERE(end);
5321 JUMPHERE(end2);
5322 }
5323
5324 return mainloop;
5325 }
5326
5327
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5328 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5329 {
5330 sljit_u32 i, count = chars->count;
5331
5332 if (count == 255)
5333 return;
5334
5335 if (count == 0)
5336 {
5337 chars->count = 1;
5338 chars->chars[0] = chr;
5339
5340 if (last)
5341 chars->last_count = 1;
5342 return;
5343 }
5344
5345 for (i = 0; i < count; i++)
5346 if (chars->chars[i] == chr)
5347 return;
5348
5349 if (count >= MAX_DIFF_CHARS)
5350 {
5351 chars->count = 255;
5352 return;
5353 }
5354
5355 chars->chars[count] = chr;
5356 chars->count = count + 1;
5357
5358 if (last)
5359 chars->last_count++;
5360 }
5361
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5362 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5363 {
5364 /* Recursive function, which scans prefix literals. */
5365 BOOL last, any, class, caseless;
5366 int len, repeat, len_save, consumed = 0;
5367 sljit_u32 chr; /* Any unicode character. */
5368 sljit_u8 *bytes, *bytes_end, byte;
5369 PCRE2_SPTR alternative, cc_save, oc;
5370 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5371 PCRE2_UCHAR othercase[4];
5372 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5373 PCRE2_UCHAR othercase[2];
5374 #else
5375 PCRE2_UCHAR othercase[1];
5376 #endif
5377
5378 repeat = 1;
5379 while (TRUE)
5380 {
5381 if (*rec_count == 0)
5382 return 0;
5383 (*rec_count)--;
5384
5385 last = TRUE;
5386 any = FALSE;
5387 class = FALSE;
5388 caseless = FALSE;
5389
5390 switch (*cc)
5391 {
5392 case OP_CHARI:
5393 caseless = TRUE;
5394 /* Fall through */
5395 case OP_CHAR:
5396 last = FALSE;
5397 cc++;
5398 break;
5399
5400 case OP_SOD:
5401 case OP_SOM:
5402 case OP_SET_SOM:
5403 case OP_NOT_WORD_BOUNDARY:
5404 case OP_WORD_BOUNDARY:
5405 case OP_EODN:
5406 case OP_EOD:
5407 case OP_CIRC:
5408 case OP_CIRCM:
5409 case OP_DOLL:
5410 case OP_DOLLM:
5411 /* Zero width assertions. */
5412 cc++;
5413 continue;
5414
5415 case OP_ASSERT:
5416 case OP_ASSERT_NOT:
5417 case OP_ASSERTBACK:
5418 case OP_ASSERTBACK_NOT:
5419 case OP_ASSERT_NA:
5420 case OP_ASSERTBACK_NA:
5421 cc = bracketend(cc);
5422 continue;
5423
5424 case OP_PLUSI:
5425 case OP_MINPLUSI:
5426 case OP_POSPLUSI:
5427 caseless = TRUE;
5428 /* Fall through */
5429 case OP_PLUS:
5430 case OP_MINPLUS:
5431 case OP_POSPLUS:
5432 cc++;
5433 break;
5434
5435 case OP_EXACTI:
5436 caseless = TRUE;
5437 /* Fall through */
5438 case OP_EXACT:
5439 repeat = GET2(cc, 1);
5440 last = FALSE;
5441 cc += 1 + IMM2_SIZE;
5442 break;
5443
5444 case OP_QUERYI:
5445 case OP_MINQUERYI:
5446 case OP_POSQUERYI:
5447 caseless = TRUE;
5448 /* Fall through */
5449 case OP_QUERY:
5450 case OP_MINQUERY:
5451 case OP_POSQUERY:
5452 len = 1;
5453 cc++;
5454 #ifdef SUPPORT_UNICODE
5455 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5456 #endif
5457 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5458 if (max_chars == 0)
5459 return consumed;
5460 last = FALSE;
5461 break;
5462
5463 case OP_KET:
5464 cc += 1 + LINK_SIZE;
5465 continue;
5466
5467 case OP_ALT:
5468 cc += GET(cc, 1);
5469 continue;
5470
5471 case OP_ONCE:
5472 case OP_BRA:
5473 case OP_BRAPOS:
5474 case OP_CBRA:
5475 case OP_CBRAPOS:
5476 alternative = cc + GET(cc, 1);
5477 while (*alternative == OP_ALT)
5478 {
5479 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5480 if (max_chars == 0)
5481 return consumed;
5482 alternative += GET(alternative, 1);
5483 }
5484
5485 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5486 cc += IMM2_SIZE;
5487 cc += 1 + LINK_SIZE;
5488 continue;
5489
5490 case OP_CLASS:
5491 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5492 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5493 return consumed;
5494 #endif
5495 class = TRUE;
5496 break;
5497
5498 case OP_NCLASS:
5499 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5500 if (common->utf) return consumed;
5501 #endif
5502 class = TRUE;
5503 break;
5504
5505 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5506 case OP_XCLASS:
5507 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5508 if (common->utf) return consumed;
5509 #endif
5510 any = TRUE;
5511 cc += GET(cc, 1);
5512 break;
5513 #endif
5514
5515 case OP_DIGIT:
5516 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5517 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5518 return consumed;
5519 #endif
5520 any = TRUE;
5521 cc++;
5522 break;
5523
5524 case OP_WHITESPACE:
5525 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5526 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5527 return consumed;
5528 #endif
5529 any = TRUE;
5530 cc++;
5531 break;
5532
5533 case OP_WORDCHAR:
5534 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5535 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5536 return consumed;
5537 #endif
5538 any = TRUE;
5539 cc++;
5540 break;
5541
5542 case OP_NOT:
5543 case OP_NOTI:
5544 cc++;
5545 /* Fall through. */
5546 case OP_NOT_DIGIT:
5547 case OP_NOT_WHITESPACE:
5548 case OP_NOT_WORDCHAR:
5549 case OP_ANY:
5550 case OP_ALLANY:
5551 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5552 if (common->utf) return consumed;
5553 #endif
5554 any = TRUE;
5555 cc++;
5556 break;
5557
5558 #ifdef SUPPORT_UNICODE
5559 case OP_NOTPROP:
5560 case OP_PROP:
5561 #if PCRE2_CODE_UNIT_WIDTH != 32
5562 if (common->utf) return consumed;
5563 #endif
5564 any = TRUE;
5565 cc += 1 + 2;
5566 break;
5567 #endif
5568
5569 case OP_TYPEEXACT:
5570 repeat = GET2(cc, 1);
5571 cc += 1 + IMM2_SIZE;
5572 continue;
5573
5574 case OP_NOTEXACT:
5575 case OP_NOTEXACTI:
5576 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5577 if (common->utf) return consumed;
5578 #endif
5579 any = TRUE;
5580 repeat = GET2(cc, 1);
5581 cc += 1 + IMM2_SIZE + 1;
5582 break;
5583
5584 default:
5585 return consumed;
5586 }
5587
5588 if (any)
5589 {
5590 do
5591 {
5592 chars->count = 255;
5593
5594 consumed++;
5595 if (--max_chars == 0)
5596 return consumed;
5597 chars++;
5598 }
5599 while (--repeat > 0);
5600
5601 repeat = 1;
5602 continue;
5603 }
5604
5605 if (class)
5606 {
5607 bytes = (sljit_u8*) (cc + 1);
5608 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5609
5610 switch (*cc)
5611 {
5612 case OP_CRSTAR:
5613 case OP_CRMINSTAR:
5614 case OP_CRPOSSTAR:
5615 case OP_CRQUERY:
5616 case OP_CRMINQUERY:
5617 case OP_CRPOSQUERY:
5618 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5619 if (max_chars == 0)
5620 return consumed;
5621 break;
5622
5623 default:
5624 case OP_CRPLUS:
5625 case OP_CRMINPLUS:
5626 case OP_CRPOSPLUS:
5627 break;
5628
5629 case OP_CRRANGE:
5630 case OP_CRMINRANGE:
5631 case OP_CRPOSRANGE:
5632 repeat = GET2(cc, 1);
5633 if (repeat <= 0)
5634 return consumed;
5635 break;
5636 }
5637
5638 do
5639 {
5640 if (bytes[31] & 0x80)
5641 chars->count = 255;
5642 else if (chars->count != 255)
5643 {
5644 bytes_end = bytes + 32;
5645 chr = 0;
5646 do
5647 {
5648 byte = *bytes++;
5649 SLJIT_ASSERT((chr & 0x7) == 0);
5650 if (byte == 0)
5651 chr += 8;
5652 else
5653 {
5654 do
5655 {
5656 if ((byte & 0x1) != 0)
5657 add_prefix_char(chr, chars, TRUE);
5658 byte >>= 1;
5659 chr++;
5660 }
5661 while (byte != 0);
5662 chr = (chr + 7) & ~7;
5663 }
5664 }
5665 while (chars->count != 255 && bytes < bytes_end);
5666 bytes = bytes_end - 32;
5667 }
5668
5669 consumed++;
5670 if (--max_chars == 0)
5671 return consumed;
5672 chars++;
5673 }
5674 while (--repeat > 0);
5675
5676 switch (*cc)
5677 {
5678 case OP_CRSTAR:
5679 case OP_CRMINSTAR:
5680 case OP_CRPOSSTAR:
5681 return consumed;
5682
5683 case OP_CRQUERY:
5684 case OP_CRMINQUERY:
5685 case OP_CRPOSQUERY:
5686 cc++;
5687 break;
5688
5689 case OP_CRRANGE:
5690 case OP_CRMINRANGE:
5691 case OP_CRPOSRANGE:
5692 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5693 return consumed;
5694 cc += 1 + 2 * IMM2_SIZE;
5695 break;
5696 }
5697
5698 repeat = 1;
5699 continue;
5700 }
5701
5702 len = 1;
5703 #ifdef SUPPORT_UNICODE
5704 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5705 #endif
5706
5707 if (caseless && char_has_othercase(common, cc))
5708 {
5709 #ifdef SUPPORT_UNICODE
5710 if (common->utf)
5711 {
5712 GETCHAR(chr, cc);
5713 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5714 return consumed;
5715 }
5716 else
5717 #endif
5718 {
5719 chr = *cc;
5720 #ifdef SUPPORT_UNICODE
5721 if (common->ucp && chr > 127)
5722 othercase[0] = UCD_OTHERCASE(chr);
5723 else
5724 #endif
5725 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5726 }
5727 }
5728 else
5729 {
5730 caseless = FALSE;
5731 othercase[0] = 0; /* Stops compiler warning - PH */
5732 }
5733
5734 len_save = len;
5735 cc_save = cc;
5736 while (TRUE)
5737 {
5738 oc = othercase;
5739 do
5740 {
5741 len--;
5742 consumed++;
5743
5744 chr = *cc;
5745 add_prefix_char(*cc, chars, len == 0);
5746
5747 if (caseless)
5748 add_prefix_char(*oc, chars, len == 0);
5749
5750 if (--max_chars == 0)
5751 return consumed;
5752 chars++;
5753 cc++;
5754 oc++;
5755 }
5756 while (len > 0);
5757
5758 if (--repeat == 0)
5759 break;
5760
5761 len = len_save;
5762 cc = cc_save;
5763 }
5764
5765 repeat = 1;
5766 if (last)
5767 return consumed;
5768 }
5769 }
5770
5771 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5772 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5773 {
5774 #if PCRE2_CODE_UNIT_WIDTH == 8
5775 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5776 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5777 #elif PCRE2_CODE_UNIT_WIDTH == 16
5778 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5779 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5780 #else
5781 #error "Unknown code width"
5782 #endif
5783 }
5784 #endif
5785
5786 #include "pcre2_jit_simd_inc.h"
5787
5788 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5789
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5790 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5791 {
5792 sljit_s32 i, j, max_i = 0, max_j = 0;
5793 sljit_u32 max_pri = 0;
5794 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5795
5796 for (i = max - 1; i >= 1; i--)
5797 {
5798 if (chars[i].last_count > 2)
5799 {
5800 a1 = chars[i].chars[0];
5801 a2 = chars[i].chars[1];
5802 a_pri = chars[i].last_count;
5803
5804 j = i - max_fast_forward_char_pair_offset();
5805 if (j < 0)
5806 j = 0;
5807
5808 while (j < i)
5809 {
5810 b_pri = chars[j].last_count;
5811 if (b_pri > 2 && a_pri + b_pri >= max_pri)
5812 {
5813 b1 = chars[j].chars[0];
5814 b2 = chars[j].chars[1];
5815
5816 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5817 {
5818 max_pri = a_pri + b_pri;
5819 max_i = i;
5820 max_j = j;
5821 }
5822 }
5823 j++;
5824 }
5825 }
5826 }
5827
5828 if (max_pri == 0)
5829 return FALSE;
5830
5831 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5832 return TRUE;
5833 }
5834
5835 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5836
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5837 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5838 {
5839 DEFINE_COMPILER;
5840 struct sljit_label *start;
5841 struct sljit_jump *match;
5842 struct sljit_jump *partial_quit;
5843 PCRE2_UCHAR mask;
5844 BOOL has_match_end = (common->match_end_ptr != 0);
5845
5846 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5847
5848 if (has_match_end)
5849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5850
5851 if (offset > 0)
5852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5853
5854 if (has_match_end)
5855 {
5856 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5857
5858 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5859 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5860 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5861 }
5862
5863 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5864
5865 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5866 {
5867 fast_forward_char_simd(common, char1, char2, offset);
5868
5869 if (offset > 0)
5870 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5871
5872 if (has_match_end)
5873 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5874 return;
5875 }
5876
5877 #endif
5878
5879 start = LABEL();
5880
5881 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5882 if (common->mode == PCRE2_JIT_COMPLETE)
5883 add_jump(compiler, &common->failed_match, partial_quit);
5884
5885 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5886 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5887
5888 if (char1 == char2)
5889 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5890 else
5891 {
5892 mask = char1 ^ char2;
5893 if (is_powerof2(mask))
5894 {
5895 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5896 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5897 }
5898 else
5899 {
5900 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5901 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5902 JUMPHERE(match);
5903 }
5904 }
5905
5906 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5907 if (common->utf && offset > 0)
5908 {
5909 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5910 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5911 }
5912 #endif
5913
5914 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5915
5916 if (common->mode != PCRE2_JIT_COMPLETE)
5917 JUMPHERE(partial_quit);
5918
5919 if (has_match_end)
5920 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5921 }
5922
fast_forward_first_n_chars(compiler_common * common)5923 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5924 {
5925 DEFINE_COMPILER;
5926 struct sljit_label *start;
5927 struct sljit_jump *match;
5928 fast_forward_char_data chars[MAX_N_CHARS];
5929 sljit_s32 offset;
5930 PCRE2_UCHAR mask;
5931 PCRE2_UCHAR *char_set, *char_set_end;
5932 int i, max, from;
5933 int range_right = -1, range_len;
5934 sljit_u8 *update_table = NULL;
5935 BOOL in_range;
5936 sljit_u32 rec_count;
5937
5938 for (i = 0; i < MAX_N_CHARS; i++)
5939 {
5940 chars[i].count = 0;
5941 chars[i].last_count = 0;
5942 }
5943
5944 rec_count = 10000;
5945 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5946
5947 if (max < 1)
5948 return FALSE;
5949
5950 /* Convert last_count to priority. */
5951 for (i = 0; i < max; i++)
5952 {
5953 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5954
5955 if (chars[i].count == 1)
5956 {
5957 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5958 /* Simplifies algorithms later. */
5959 chars[i].chars[1] = chars[i].chars[0];
5960 }
5961 else if (chars[i].count == 2)
5962 {
5963 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5964
5965 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5966 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5967 else
5968 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5969 }
5970 else
5971 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5972 }
5973
5974 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5975 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5976 return TRUE;
5977 #endif
5978
5979 in_range = FALSE;
5980 /* Prevent compiler "uninitialized" warning */
5981 from = 0;
5982 range_len = 4 /* minimum length */ - 1;
5983 for (i = 0; i <= max; i++)
5984 {
5985 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5986 {
5987 range_len = i - from;
5988 range_right = i - 1;
5989 }
5990
5991 if (i < max && chars[i].count < 255)
5992 {
5993 SLJIT_ASSERT(chars[i].count > 0);
5994 if (!in_range)
5995 {
5996 in_range = TRUE;
5997 from = i;
5998 }
5999 }
6000 else
6001 in_range = FALSE;
6002 }
6003
6004 if (range_right >= 0)
6005 {
6006 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6007 if (update_table == NULL)
6008 return TRUE;
6009 memset(update_table, IN_UCHARS(range_len), 256);
6010
6011 for (i = 0; i < range_len; i++)
6012 {
6013 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6014
6015 char_set = chars[range_right - i].chars;
6016 char_set_end = char_set + chars[range_right - i].count;
6017 do
6018 {
6019 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6020 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6021 char_set++;
6022 }
6023 while (char_set < char_set_end);
6024 }
6025 }
6026
6027 offset = -1;
6028 /* Scan forward. */
6029 for (i = 0; i < max; i++)
6030 {
6031 if (range_right == i)
6032 continue;
6033
6034 if (offset == -1)
6035 {
6036 if (chars[i].last_count >= 2)
6037 offset = i;
6038 }
6039 else if (chars[offset].last_count < chars[i].last_count)
6040 offset = i;
6041 }
6042
6043 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6044
6045 if (range_right < 0)
6046 {
6047 if (offset < 0)
6048 return FALSE;
6049 /* Works regardless the value is 1 or 2. */
6050 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6051 return TRUE;
6052 }
6053
6054 SLJIT_ASSERT(range_right != offset);
6055
6056 if (common->match_end_ptr != 0)
6057 {
6058 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6059 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6060 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6061 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6062 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6063 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6064 }
6065 else
6066 {
6067 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6068 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6069 }
6070
6071 SLJIT_ASSERT(range_right >= 0);
6072
6073 if (!HAS_VIRTUAL_REGISTERS)
6074 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6075
6076 start = LABEL();
6077 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6078
6079 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6080 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6081 #else
6082 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6083 #endif
6084
6085 if (!HAS_VIRTUAL_REGISTERS)
6086 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6087 else
6088 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6089
6090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6091 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6092
6093 if (offset >= 0)
6094 {
6095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6096 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6097
6098 if (chars[offset].count == 1)
6099 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6100 else
6101 {
6102 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6103 if (is_powerof2(mask))
6104 {
6105 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6106 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6107 }
6108 else
6109 {
6110 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6111 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6112 JUMPHERE(match);
6113 }
6114 }
6115 }
6116
6117 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6118 if (common->utf && offset != 0)
6119 {
6120 if (offset < 0)
6121 {
6122 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6123 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6124 }
6125 else
6126 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6127
6128 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6129
6130 if (offset < 0)
6131 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6132 }
6133 #endif
6134
6135 if (offset >= 0)
6136 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6137
6138 if (common->match_end_ptr != 0)
6139 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6140 else
6141 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6142 return TRUE;
6143 }
6144
fast_forward_first_char(compiler_common * common)6145 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6146 {
6147 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6148 PCRE2_UCHAR oc;
6149
6150 oc = first_char;
6151 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6152 {
6153 oc = TABLE_GET(first_char, common->fcc, first_char);
6154 #if defined SUPPORT_UNICODE
6155 if (first_char > 127 && (common->utf || common->ucp))
6156 oc = UCD_OTHERCASE(first_char);
6157 #endif
6158 }
6159
6160 fast_forward_first_char2(common, first_char, oc, 0);
6161 }
6162
fast_forward_newline(compiler_common * common)6163 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6164 {
6165 DEFINE_COMPILER;
6166 struct sljit_label *loop;
6167 struct sljit_jump *lastchar;
6168 struct sljit_jump *firstchar;
6169 struct sljit_jump *quit;
6170 struct sljit_jump *foundcr = NULL;
6171 struct sljit_jump *notfoundnl;
6172 jump_list *newline = NULL;
6173
6174 if (common->match_end_ptr != 0)
6175 {
6176 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6177 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6178 }
6179
6180 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6181 {
6182 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6183 if (HAS_VIRTUAL_REGISTERS)
6184 {
6185 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6186 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6188 }
6189 else
6190 {
6191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6192 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6193 }
6194 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6195
6196 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6197 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6198 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6199 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6201 #endif
6202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6203
6204 loop = LABEL();
6205 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6206 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6207 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6208 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6209 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6210 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6211
6212 JUMPHERE(quit);
6213 JUMPHERE(firstchar);
6214 JUMPHERE(lastchar);
6215
6216 if (common->match_end_ptr != 0)
6217 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218 return;
6219 }
6220
6221 if (HAS_VIRTUAL_REGISTERS)
6222 {
6223 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6225 }
6226 else
6227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6228
6229 /* Example: match /^/ to \r\n from offset 1. */
6230 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6231 move_back(common, NULL, FALSE);
6232
6233 loop = LABEL();
6234 common->ff_newline_shortcut = loop;
6235
6236 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6237 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6238 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6239 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6240 check_newlinechar(common, common->nltype, &newline, FALSE);
6241 set_jumps(newline, loop);
6242
6243 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6244 {
6245 quit = JUMP(SLJIT_JUMP);
6246 JUMPHERE(foundcr);
6247 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6248 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6249 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6250 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6251 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6252 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6253 #endif
6254 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6255 JUMPHERE(notfoundnl);
6256 JUMPHERE(quit);
6257 }
6258 JUMPHERE(lastchar);
6259 JUMPHERE(firstchar);
6260
6261 if (common->match_end_ptr != 0)
6262 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6263 }
6264
6265 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6266
fast_forward_start_bits(compiler_common * common)6267 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6268 {
6269 DEFINE_COMPILER;
6270 const sljit_u8 *start_bits = common->re->start_bitmap;
6271 struct sljit_label *start;
6272 struct sljit_jump *partial_quit;
6273 #if PCRE2_CODE_UNIT_WIDTH != 8
6274 struct sljit_jump *found = NULL;
6275 #endif
6276 jump_list *matches = NULL;
6277
6278 if (common->match_end_ptr != 0)
6279 {
6280 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6281 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6282 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6283 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6284 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6285 }
6286
6287 start = LABEL();
6288
6289 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6290 if (common->mode == PCRE2_JIT_COMPLETE)
6291 add_jump(compiler, &common->failed_match, partial_quit);
6292
6293 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6294 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6295
6296 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6297 {
6298 #if PCRE2_CODE_UNIT_WIDTH != 8
6299 if ((start_bits[31] & 0x80) != 0)
6300 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6301 else
6302 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6303 #elif defined SUPPORT_UNICODE
6304 if (common->utf && is_char7_bitset(start_bits, FALSE))
6305 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6306 #endif
6307 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6308 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6309 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6310 if (!HAS_VIRTUAL_REGISTERS)
6311 {
6312 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6313 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6314 }
6315 else
6316 {
6317 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6318 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6319 }
6320 JUMPTO(SLJIT_ZERO, start);
6321 }
6322 else
6323 set_jumps(matches, start);
6324
6325 #if PCRE2_CODE_UNIT_WIDTH != 8
6326 if (found != NULL)
6327 JUMPHERE(found);
6328 #endif
6329
6330 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6331
6332 if (common->mode != PCRE2_JIT_COMPLETE)
6333 JUMPHERE(partial_quit);
6334
6335 if (common->match_end_ptr != 0)
6336 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6337 }
6338
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6339 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6340 {
6341 DEFINE_COMPILER;
6342 struct sljit_label *loop;
6343 struct sljit_jump *toolong;
6344 struct sljit_jump *already_found;
6345 struct sljit_jump *found;
6346 struct sljit_jump *found_oc = NULL;
6347 jump_list *not_found = NULL;
6348 sljit_u32 oc, bit;
6349
6350 SLJIT_ASSERT(common->req_char_ptr != 0);
6351 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6353 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6354 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6355
6356 if (has_firstchar)
6357 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6358 else
6359 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6360
6361 oc = req_char;
6362 if (caseless)
6363 {
6364 oc = TABLE_GET(req_char, common->fcc, req_char);
6365 #if defined SUPPORT_UNICODE
6366 if (req_char > 127 && (common->utf || common->ucp))
6367 oc = UCD_OTHERCASE(req_char);
6368 #endif
6369 }
6370
6371 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6372 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6373 {
6374 not_found = fast_requested_char_simd(common, req_char, oc);
6375 }
6376 else
6377 #endif
6378 {
6379 loop = LABEL();
6380 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6381
6382 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6383
6384 if (req_char == oc)
6385 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6386 else
6387 {
6388 bit = req_char ^ oc;
6389 if (is_powerof2(bit))
6390 {
6391 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6392 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6393 }
6394 else
6395 {
6396 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6397 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6398 }
6399 }
6400 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6401 JUMPTO(SLJIT_JUMP, loop);
6402
6403 JUMPHERE(found);
6404 if (found_oc)
6405 JUMPHERE(found_oc);
6406 }
6407
6408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6409
6410 JUMPHERE(already_found);
6411 JUMPHERE(toolong);
6412 return not_found;
6413 }
6414
do_revertframes(compiler_common * common)6415 static void do_revertframes(compiler_common *common)
6416 {
6417 DEFINE_COMPILER;
6418 struct sljit_jump *jump;
6419 struct sljit_label *mainloop;
6420
6421 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6422 GET_LOCAL_BASE(TMP1, 0, 0);
6423
6424 /* Drop frames until we reach STACK_TOP. */
6425 mainloop = LABEL();
6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6427 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6428
6429 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6430 if (HAS_VIRTUAL_REGISTERS)
6431 {
6432 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6433 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6434 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6435 }
6436 else
6437 {
6438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6439 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6440 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6441 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6442 GET_LOCAL_BASE(TMP1, 0, 0);
6443 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6444 }
6445 JUMPTO(SLJIT_JUMP, mainloop);
6446
6447 JUMPHERE(jump);
6448 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6449 /* End of reverting values. */
6450 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6451
6452 JUMPHERE(jump);
6453 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6454 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6455 if (HAS_VIRTUAL_REGISTERS)
6456 {
6457 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6458 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6459 }
6460 else
6461 {
6462 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6463 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6464 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6465 }
6466 JUMPTO(SLJIT_JUMP, mainloop);
6467 }
6468
check_wordboundary(compiler_common * common)6469 static void check_wordboundary(compiler_common *common)
6470 {
6471 DEFINE_COMPILER;
6472 struct sljit_jump *skipread;
6473 jump_list *skipread_list = NULL;
6474 #ifdef SUPPORT_UNICODE
6475 struct sljit_label *valid_utf;
6476 jump_list *invalid_utf1 = NULL;
6477 #endif /* SUPPORT_UNICODE */
6478 jump_list *invalid_utf2 = NULL;
6479 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6480 struct sljit_jump *jump;
6481 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6482
6483 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6484
6485 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6486 /* Get type of the previous char, and put it to TMP3. */
6487 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6488 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6489 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6490 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6491
6492 #ifdef SUPPORT_UNICODE
6493 if (common->invalid_utf)
6494 {
6495 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6496
6497 if (common->mode != PCRE2_JIT_COMPLETE)
6498 {
6499 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6500 move_back(common, NULL, TRUE);
6501 check_start_used_ptr(common);
6502 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6503 }
6504 }
6505 else
6506 #endif /* SUPPORT_UNICODE */
6507 {
6508 if (common->mode == PCRE2_JIT_COMPLETE)
6509 peek_char_back(common, READ_CHAR_MAX, NULL);
6510 else
6511 {
6512 move_back(common, NULL, TRUE);
6513 check_start_used_ptr(common);
6514 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6515 }
6516 }
6517
6518 /* Testing char type. */
6519 #ifdef SUPPORT_UNICODE
6520 if (common->ucp)
6521 {
6522 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6523 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6524 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6525 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6526 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6527 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6528 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6529 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6530 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6531 JUMPHERE(jump);
6532 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6533 }
6534 else
6535 #endif /* SUPPORT_UNICODE */
6536 {
6537 #if PCRE2_CODE_UNIT_WIDTH != 8
6538 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6539 #elif defined SUPPORT_UNICODE
6540 /* Here TMP3 has already been zeroed. */
6541 jump = NULL;
6542 if (common->utf)
6543 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6544 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6545 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6546 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6547 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6548 #if PCRE2_CODE_UNIT_WIDTH != 8
6549 JUMPHERE(jump);
6550 #elif defined SUPPORT_UNICODE
6551 if (jump != NULL)
6552 JUMPHERE(jump);
6553 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6554 }
6555 JUMPHERE(skipread);
6556
6557 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6558 check_str_end(common, &skipread_list);
6559 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6560
6561 /* Testing char type. This is a code duplication. */
6562 #ifdef SUPPORT_UNICODE
6563
6564 valid_utf = LABEL();
6565
6566 if (common->ucp)
6567 {
6568 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6569 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6570 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6571 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6572 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6573 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6574 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6575 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6576 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6577 JUMPHERE(jump);
6578 }
6579 else
6580 #endif /* SUPPORT_UNICODE */
6581 {
6582 #if PCRE2_CODE_UNIT_WIDTH != 8
6583 /* TMP2 may be destroyed by peek_char. */
6584 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6585 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6586 #elif defined SUPPORT_UNICODE
6587 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6588 jump = NULL;
6589 if (common->utf)
6590 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6591 #endif
6592 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6593 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6594 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6595 #if PCRE2_CODE_UNIT_WIDTH != 8
6596 JUMPHERE(jump);
6597 #elif defined SUPPORT_UNICODE
6598 if (jump != NULL)
6599 JUMPHERE(jump);
6600 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6601 }
6602 set_jumps(skipread_list, LABEL());
6603
6604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6605 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6606 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6607
6608 #ifdef SUPPORT_UNICODE
6609 if (common->invalid_utf)
6610 {
6611 set_jumps(invalid_utf1, LABEL());
6612
6613 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6614 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6615
6616 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6618 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6619
6620 set_jumps(invalid_utf2, LABEL());
6621 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6622 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6623 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6624 }
6625 #endif /* SUPPORT_UNICODE */
6626 }
6627
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6628 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6629 {
6630 /* May destroy TMP1. */
6631 DEFINE_COMPILER;
6632 int ranges[MAX_CLASS_RANGE_SIZE];
6633 sljit_u8 bit, cbit, all;
6634 int i, byte, length = 0;
6635
6636 bit = bits[0] & 0x1;
6637 /* All bits will be zero or one (since bit is zero or one). */
6638 all = -bit;
6639
6640 for (i = 0; i < 256; )
6641 {
6642 byte = i >> 3;
6643 if ((i & 0x7) == 0 && bits[byte] == all)
6644 i += 8;
6645 else
6646 {
6647 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6648 if (cbit != bit)
6649 {
6650 if (length >= MAX_CLASS_RANGE_SIZE)
6651 return FALSE;
6652 ranges[length] = i;
6653 length++;
6654 bit = cbit;
6655 all = -cbit;
6656 }
6657 i++;
6658 }
6659 }
6660
6661 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6662 {
6663 if (length >= MAX_CLASS_RANGE_SIZE)
6664 return FALSE;
6665 ranges[length] = 256;
6666 length++;
6667 }
6668
6669 if (length < 0 || length > 4)
6670 return FALSE;
6671
6672 bit = bits[0] & 0x1;
6673 if (invert) bit ^= 0x1;
6674
6675 /* No character is accepted. */
6676 if (length == 0 && bit == 0)
6677 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6678
6679 switch(length)
6680 {
6681 case 0:
6682 /* When bit != 0, all characters are accepted. */
6683 return TRUE;
6684
6685 case 1:
6686 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6687 return TRUE;
6688
6689 case 2:
6690 if (ranges[0] + 1 != ranges[1])
6691 {
6692 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6693 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6694 }
6695 else
6696 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6697 return TRUE;
6698
6699 case 3:
6700 if (bit != 0)
6701 {
6702 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6703 if (ranges[0] + 1 != ranges[1])
6704 {
6705 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6706 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6707 }
6708 else
6709 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6710 return TRUE;
6711 }
6712
6713 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6714 if (ranges[1] + 1 != ranges[2])
6715 {
6716 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6717 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6718 }
6719 else
6720 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6721 return TRUE;
6722
6723 case 4:
6724 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6725 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6726 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6727 && is_powerof2(ranges[2] - ranges[0]))
6728 {
6729 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6731 if (ranges[2] + 1 != ranges[3])
6732 {
6733 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6734 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6735 }
6736 else
6737 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6738 return TRUE;
6739 }
6740
6741 if (bit != 0)
6742 {
6743 i = 0;
6744 if (ranges[0] + 1 != ranges[1])
6745 {
6746 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6747 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6748 i = ranges[0];
6749 }
6750 else
6751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6752
6753 if (ranges[2] + 1 != ranges[3])
6754 {
6755 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6756 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6757 }
6758 else
6759 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6760 return TRUE;
6761 }
6762
6763 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6764 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6765 if (ranges[1] + 1 != ranges[2])
6766 {
6767 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6768 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6769 }
6770 else
6771 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6772 return TRUE;
6773
6774 default:
6775 SLJIT_UNREACHABLE();
6776 return FALSE;
6777 }
6778 }
6779
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6780 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6781 {
6782 /* May destroy TMP1. */
6783 DEFINE_COMPILER;
6784 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6785 uint8_t byte;
6786 sljit_s32 type;
6787 int i, j, k, len, c;
6788
6789 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6790 return FALSE;
6791
6792 len = 0;
6793
6794 for (i = 0; i < 32; i++)
6795 {
6796 byte = bits[i];
6797
6798 if (nclass)
6799 byte = ~byte;
6800
6801 j = 0;
6802 while (byte != 0)
6803 {
6804 if (byte & 0x1)
6805 {
6806 c = i * 8 + j;
6807
6808 k = len;
6809
6810 if ((c & 0x20) != 0)
6811 {
6812 for (k = 0; k < len; k++)
6813 if (char_list[k] == c - 0x20)
6814 {
6815 char_list[k] |= 0x120;
6816 break;
6817 }
6818 }
6819
6820 if (k == len)
6821 {
6822 if (len >= MAX_CLASS_CHARS_SIZE)
6823 return FALSE;
6824
6825 char_list[len++] = (uint16_t) c;
6826 }
6827 }
6828
6829 byte >>= 1;
6830 j++;
6831 }
6832 }
6833
6834 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
6835
6836 i = 0;
6837 j = 0;
6838
6839 if (char_list[0] == 0)
6840 {
6841 i++;
6842 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6843 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6844 }
6845 else
6846 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6847
6848 while (i < len)
6849 {
6850 if ((char_list[i] & 0x100) != 0)
6851 j++;
6852 else
6853 {
6854 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6855 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6856 }
6857 i++;
6858 }
6859
6860 if (j != 0)
6861 {
6862 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6863
6864 for (i = 0; i < len; i++)
6865 if ((char_list[i] & 0x100) != 0)
6866 {
6867 j--;
6868 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6869 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6870 }
6871 }
6872
6873 if (invert)
6874 nclass = !nclass;
6875
6876 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6877 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6878 return TRUE;
6879 }
6880
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6881 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6882 {
6883 /* May destroy TMP1. */
6884 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6885 return TRUE;
6886 return optimize_class_chars(common, bits, nclass, invert, backtracks);
6887 }
6888
check_anynewline(compiler_common * common)6889 static void check_anynewline(compiler_common *common)
6890 {
6891 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6892 DEFINE_COMPILER;
6893
6894 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6895
6896 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6897 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6898 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6899 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6900 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6901 #if PCRE2_CODE_UNIT_WIDTH == 8
6902 if (common->utf)
6903 {
6904 #endif
6905 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6906 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6907 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6908 #if PCRE2_CODE_UNIT_WIDTH == 8
6909 }
6910 #endif
6911 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6912 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6913 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6914 }
6915
check_hspace(compiler_common * common)6916 static void check_hspace(compiler_common *common)
6917 {
6918 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6919 DEFINE_COMPILER;
6920
6921 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6922
6923 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
6924 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6925 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
6926 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6927 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
6928 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6929 #if PCRE2_CODE_UNIT_WIDTH == 8
6930 if (common->utf)
6931 {
6932 #endif
6933 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6934 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
6935 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6936 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
6937 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6938 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
6939 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
6940 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6941 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
6942 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6943 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
6944 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6945 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
6946 #if PCRE2_CODE_UNIT_WIDTH == 8
6947 }
6948 #endif
6949 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6950 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6951
6952 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6953 }
6954
check_vspace(compiler_common * common)6955 static void check_vspace(compiler_common *common)
6956 {
6957 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6958 DEFINE_COMPILER;
6959
6960 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6961
6962 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6963 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6964 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6965 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6966 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6967 #if PCRE2_CODE_UNIT_WIDTH == 8
6968 if (common->utf)
6969 {
6970 #endif
6971 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6972 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6973 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6974 #if PCRE2_CODE_UNIT_WIDTH == 8
6975 }
6976 #endif
6977 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6978 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6979
6980 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6981 }
6982
do_casefulcmp(compiler_common * common)6983 static void do_casefulcmp(compiler_common *common)
6984 {
6985 DEFINE_COMPILER;
6986 struct sljit_jump *jump;
6987 struct sljit_label *label;
6988 int char1_reg;
6989 int char2_reg;
6990
6991 if (HAS_VIRTUAL_REGISTERS)
6992 {
6993 char1_reg = STR_END;
6994 char2_reg = STACK_TOP;
6995 }
6996 else
6997 {
6998 char1_reg = TMP3;
6999 char2_reg = RETURN_ADDR;
7000 }
7001
7002 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7003 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7004
7005 if (char1_reg == STR_END)
7006 {
7007 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7008 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7009 }
7010
7011 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7012 {
7013 label = LABEL();
7014 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7015 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7016 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7017 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7018 JUMPTO(SLJIT_NOT_ZERO, label);
7019
7020 JUMPHERE(jump);
7021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7022 }
7023 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7024 {
7025 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7026 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7027
7028 label = LABEL();
7029 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7030 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7031 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7032 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7033 JUMPTO(SLJIT_NOT_ZERO, label);
7034
7035 JUMPHERE(jump);
7036 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7037 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7038 }
7039 else
7040 {
7041 label = LABEL();
7042 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7043 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7044 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7046 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7047 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7048 JUMPTO(SLJIT_NOT_ZERO, label);
7049
7050 JUMPHERE(jump);
7051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7052 }
7053
7054 if (char1_reg == STR_END)
7055 {
7056 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7057 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7058 }
7059
7060 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7061 }
7062
do_caselesscmp(compiler_common * common)7063 static void do_caselesscmp(compiler_common *common)
7064 {
7065 DEFINE_COMPILER;
7066 struct sljit_jump *jump;
7067 struct sljit_label *label;
7068 int char1_reg = STR_END;
7069 int char2_reg;
7070 int lcc_table;
7071 int opt_type = 0;
7072
7073 if (HAS_VIRTUAL_REGISTERS)
7074 {
7075 char2_reg = STACK_TOP;
7076 lcc_table = STACK_LIMIT;
7077 }
7078 else
7079 {
7080 char2_reg = RETURN_ADDR;
7081 lcc_table = TMP3;
7082 }
7083
7084 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7085 opt_type = 1;
7086 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7087 opt_type = 2;
7088
7089 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7090 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7091
7092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7093
7094 if (char2_reg == STACK_TOP)
7095 {
7096 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7097 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7098 }
7099
7100 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7101
7102 if (opt_type == 1)
7103 {
7104 label = LABEL();
7105 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7106 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7107 }
7108 else if (opt_type == 2)
7109 {
7110 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7111 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7112
7113 label = LABEL();
7114 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7115 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7116 }
7117 else
7118 {
7119 label = LABEL();
7120 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7121 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7122 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7123 }
7124
7125 #if PCRE2_CODE_UNIT_WIDTH != 8
7126 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7127 #endif
7128 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7129 #if PCRE2_CODE_UNIT_WIDTH != 8
7130 JUMPHERE(jump);
7131 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7132 #endif
7133 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7134 #if PCRE2_CODE_UNIT_WIDTH != 8
7135 JUMPHERE(jump);
7136 #endif
7137
7138 if (opt_type == 0)
7139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7140
7141 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7142 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7143 JUMPTO(SLJIT_NOT_ZERO, label);
7144
7145 JUMPHERE(jump);
7146 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7147
7148 if (opt_type == 2)
7149 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7150
7151 if (char2_reg == STACK_TOP)
7152 {
7153 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7154 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7155 }
7156
7157 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7158 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7159 }
7160
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7161 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7162 compare_context *context, jump_list **backtracks)
7163 {
7164 DEFINE_COMPILER;
7165 unsigned int othercasebit = 0;
7166 PCRE2_SPTR othercasechar = NULL;
7167 #ifdef SUPPORT_UNICODE
7168 int utflength;
7169 #endif
7170
7171 if (caseless && char_has_othercase(common, cc))
7172 {
7173 othercasebit = char_get_othercase_bit(common, cc);
7174 SLJIT_ASSERT(othercasebit);
7175 /* Extracting bit difference info. */
7176 #if PCRE2_CODE_UNIT_WIDTH == 8
7177 othercasechar = cc + (othercasebit >> 8);
7178 othercasebit &= 0xff;
7179 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7180 /* Note that this code only handles characters in the BMP. If there
7181 ever are characters outside the BMP whose othercase differs in only one
7182 bit from itself (there currently are none), this code will need to be
7183 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7184 othercasechar = cc + (othercasebit >> 9);
7185 if ((othercasebit & 0x100) != 0)
7186 othercasebit = (othercasebit & 0xff) << 8;
7187 else
7188 othercasebit &= 0xff;
7189 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7190 }
7191
7192 if (context->sourcereg == -1)
7193 {
7194 #if PCRE2_CODE_UNIT_WIDTH == 8
7195 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7196 if (context->length >= 4)
7197 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7198 else if (context->length >= 2)
7199 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7200 else
7201 #endif
7202 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7203 #elif PCRE2_CODE_UNIT_WIDTH == 16
7204 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7205 if (context->length >= 4)
7206 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7207 else
7208 #endif
7209 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7210 #elif PCRE2_CODE_UNIT_WIDTH == 32
7211 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7212 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7213 context->sourcereg = TMP2;
7214 }
7215
7216 #ifdef SUPPORT_UNICODE
7217 utflength = 1;
7218 if (common->utf && HAS_EXTRALEN(*cc))
7219 utflength += GET_EXTRALEN(*cc);
7220
7221 do
7222 {
7223 #endif
7224
7225 context->length -= IN_UCHARS(1);
7226 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7227
7228 /* Unaligned read is supported. */
7229 if (othercasebit != 0 && othercasechar == cc)
7230 {
7231 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7232 context->oc.asuchars[context->ucharptr] = othercasebit;
7233 }
7234 else
7235 {
7236 context->c.asuchars[context->ucharptr] = *cc;
7237 context->oc.asuchars[context->ucharptr] = 0;
7238 }
7239 context->ucharptr++;
7240
7241 #if PCRE2_CODE_UNIT_WIDTH == 8
7242 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7243 #else
7244 if (context->ucharptr >= 2 || context->length == 0)
7245 #endif
7246 {
7247 if (context->length >= 4)
7248 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7249 else if (context->length >= 2)
7250 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7251 #if PCRE2_CODE_UNIT_WIDTH == 8
7252 else if (context->length >= 1)
7253 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7254 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7255 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7256
7257 switch(context->ucharptr)
7258 {
7259 case 4 / sizeof(PCRE2_UCHAR):
7260 if (context->oc.asint != 0)
7261 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7262 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7263 break;
7264
7265 case 2 / sizeof(PCRE2_UCHAR):
7266 if (context->oc.asushort != 0)
7267 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7268 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7269 break;
7270
7271 #if PCRE2_CODE_UNIT_WIDTH == 8
7272 case 1:
7273 if (context->oc.asbyte != 0)
7274 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7275 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7276 break;
7277 #endif
7278
7279 default:
7280 SLJIT_UNREACHABLE();
7281 break;
7282 }
7283 context->ucharptr = 0;
7284 }
7285
7286 #else
7287
7288 /* Unaligned read is unsupported or in 32 bit mode. */
7289 if (context->length >= 1)
7290 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7291
7292 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7293
7294 if (othercasebit != 0 && othercasechar == cc)
7295 {
7296 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7297 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7298 }
7299 else
7300 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7301
7302 #endif
7303
7304 cc++;
7305 #ifdef SUPPORT_UNICODE
7306 utflength--;
7307 }
7308 while (utflength > 0);
7309 #endif
7310
7311 return cc;
7312 }
7313
7314 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7315
7316 #define SET_TYPE_OFFSET(value) \
7317 if ((value) != typeoffset) \
7318 { \
7319 if ((value) < typeoffset) \
7320 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7321 else \
7322 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7323 } \
7324 typeoffset = (value);
7325
7326 #define SET_CHAR_OFFSET(value) \
7327 if ((value) != charoffset) \
7328 { \
7329 if ((value) < charoffset) \
7330 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7331 else \
7332 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7333 } \
7334 charoffset = (value);
7335
7336 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7337
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7338 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7339 {
7340 DEFINE_COMPILER;
7341 jump_list *found = NULL;
7342 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7343 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7344 struct sljit_jump *jump = NULL;
7345 PCRE2_SPTR ccbegin;
7346 int compares, invertcmp, numberofcmps;
7347 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7348 BOOL utf = common->utf;
7349 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7350
7351 #ifdef SUPPORT_UNICODE
7352 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7353 BOOL charsaved = FALSE;
7354 int typereg = TMP1;
7355 const sljit_u32 *other_cases;
7356 sljit_uw typeoffset;
7357 #endif /* SUPPORT_UNICODE */
7358
7359 /* Scanning the necessary info. */
7360 cc++;
7361 ccbegin = cc;
7362 compares = 0;
7363
7364 if (cc[-1] & XCL_MAP)
7365 {
7366 min = 0;
7367 cc += 32 / sizeof(PCRE2_UCHAR);
7368 }
7369
7370 while (*cc != XCL_END)
7371 {
7372 compares++;
7373 if (*cc == XCL_SINGLE)
7374 {
7375 cc ++;
7376 GETCHARINCTEST(c, cc);
7377 if (c > max) max = c;
7378 if (c < min) min = c;
7379 #ifdef SUPPORT_UNICODE
7380 needschar = TRUE;
7381 #endif /* SUPPORT_UNICODE */
7382 }
7383 else if (*cc == XCL_RANGE)
7384 {
7385 cc ++;
7386 GETCHARINCTEST(c, cc);
7387 if (c < min) min = c;
7388 GETCHARINCTEST(c, cc);
7389 if (c > max) max = c;
7390 #ifdef SUPPORT_UNICODE
7391 needschar = TRUE;
7392 #endif /* SUPPORT_UNICODE */
7393 }
7394 #ifdef SUPPORT_UNICODE
7395 else
7396 {
7397 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7398 cc++;
7399 if (*cc == PT_CLIST)
7400 {
7401 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7402 while (*other_cases != NOTACHAR)
7403 {
7404 if (*other_cases > max) max = *other_cases;
7405 if (*other_cases < min) min = *other_cases;
7406 other_cases++;
7407 }
7408 }
7409 else
7410 {
7411 max = READ_CHAR_MAX;
7412 min = 0;
7413 }
7414
7415 switch(*cc)
7416 {
7417 case PT_ANY:
7418 /* Any either accepts everything or ignored. */
7419 if (cc[-1] == XCL_PROP)
7420 {
7421 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7422 if (list == backtracks)
7423 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7424 return;
7425 }
7426 break;
7427
7428 case PT_LAMP:
7429 case PT_GC:
7430 case PT_PC:
7431 case PT_ALNUM:
7432 needstype = TRUE;
7433 break;
7434
7435 case PT_SC:
7436 needsscript = TRUE;
7437 break;
7438
7439 case PT_SPACE:
7440 case PT_PXSPACE:
7441 case PT_WORD:
7442 case PT_PXGRAPH:
7443 case PT_PXPRINT:
7444 case PT_PXPUNCT:
7445 needstype = TRUE;
7446 needschar = TRUE;
7447 break;
7448
7449 case PT_CLIST:
7450 case PT_UCNC:
7451 needschar = TRUE;
7452 break;
7453
7454 default:
7455 SLJIT_UNREACHABLE();
7456 break;
7457 }
7458 cc += 2;
7459 }
7460 #endif /* SUPPORT_UNICODE */
7461 }
7462 SLJIT_ASSERT(compares > 0);
7463
7464 /* We are not necessary in utf mode even in 8 bit mode. */
7465 cc = ccbegin;
7466 if ((cc[-1] & XCL_NOT) != 0)
7467 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7468 else
7469 {
7470 #ifdef SUPPORT_UNICODE
7471 read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7472 #else /* !SUPPORT_UNICODE */
7473 read_char(common, min, max, NULL, 0);
7474 #endif /* SUPPORT_UNICODE */
7475 }
7476
7477 if ((cc[-1] & XCL_HASPROP) == 0)
7478 {
7479 if ((cc[-1] & XCL_MAP) != 0)
7480 {
7481 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7482 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7483 {
7484 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7485 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7486 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7487 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7488 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7489 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7490 }
7491
7492 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7493 JUMPHERE(jump);
7494
7495 cc += 32 / sizeof(PCRE2_UCHAR);
7496 }
7497 else
7498 {
7499 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7500 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7501 }
7502 }
7503 else if ((cc[-1] & XCL_MAP) != 0)
7504 {
7505 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7506 #ifdef SUPPORT_UNICODE
7507 charsaved = TRUE;
7508 #endif /* SUPPORT_UNICODE */
7509 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7510 {
7511 #if PCRE2_CODE_UNIT_WIDTH == 8
7512 jump = NULL;
7513 if (common->utf)
7514 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7515 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7516
7517 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7518 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7519 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7520 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7521 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7522 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7523
7524 #if PCRE2_CODE_UNIT_WIDTH == 8
7525 if (common->utf)
7526 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7527 JUMPHERE(jump);
7528 }
7529
7530 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7531 cc += 32 / sizeof(PCRE2_UCHAR);
7532 }
7533
7534 #ifdef SUPPORT_UNICODE
7535 if (needstype || needsscript)
7536 {
7537 if (needschar && !charsaved)
7538 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7539
7540 #if PCRE2_CODE_UNIT_WIDTH == 32
7541 if (!common->utf)
7542 {
7543 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7544 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7545 JUMPHERE(jump);
7546 }
7547 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7548
7549 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7550 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7551 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7552 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7553 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7554 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7555 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7556 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7557
7558 /* Before anything else, we deal with scripts. */
7559 if (needsscript)
7560 {
7561 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7562 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7563 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7564
7565 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7566
7567 ccbegin = cc;
7568
7569 while (*cc != XCL_END)
7570 {
7571 if (*cc == XCL_SINGLE)
7572 {
7573 cc ++;
7574 GETCHARINCTEST(c, cc);
7575 }
7576 else if (*cc == XCL_RANGE)
7577 {
7578 cc ++;
7579 GETCHARINCTEST(c, cc);
7580 GETCHARINCTEST(c, cc);
7581 }
7582 else
7583 {
7584 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7585 cc++;
7586 if (*cc == PT_SC)
7587 {
7588 compares--;
7589 invertcmp = (compares == 0 && list != backtracks);
7590 if (cc[-1] == XCL_NOTPROP)
7591 invertcmp ^= 0x1;
7592 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7593 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7594 }
7595 cc += 2;
7596 }
7597 }
7598
7599 cc = ccbegin;
7600
7601 if (needstype)
7602 {
7603 /* TMP2 has already been shifted by 2 */
7604 if (!needschar)
7605 {
7606 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7607 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7608
7609 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7610 }
7611 else
7612 {
7613 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7614 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7615
7616 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7617 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7618 typereg = RETURN_ADDR;
7619 }
7620 }
7621 else if (needschar)
7622 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7623 }
7624 else if (needstype)
7625 {
7626 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7627 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7628
7629 if (!needschar)
7630 {
7631 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7632
7633 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7634 }
7635 else
7636 {
7637 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7638
7639 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7640 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7641 typereg = RETURN_ADDR;
7642 }
7643 }
7644 else if (needschar)
7645 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7646 }
7647 #endif /* SUPPORT_UNICODE */
7648
7649 /* Generating code. */
7650 charoffset = 0;
7651 numberofcmps = 0;
7652 #ifdef SUPPORT_UNICODE
7653 typeoffset = 0;
7654 #endif /* SUPPORT_UNICODE */
7655
7656 while (*cc != XCL_END)
7657 {
7658 compares--;
7659 invertcmp = (compares == 0 && list != backtracks);
7660 jump = NULL;
7661
7662 if (*cc == XCL_SINGLE)
7663 {
7664 cc ++;
7665 GETCHARINCTEST(c, cc);
7666
7667 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7668 {
7669 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7670 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7671 numberofcmps++;
7672 }
7673 else if (numberofcmps > 0)
7674 {
7675 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7676 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7677 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7678 numberofcmps = 0;
7679 }
7680 else
7681 {
7682 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7683 numberofcmps = 0;
7684 }
7685 }
7686 else if (*cc == XCL_RANGE)
7687 {
7688 cc ++;
7689 GETCHARINCTEST(c, cc);
7690 SET_CHAR_OFFSET(c);
7691 GETCHARINCTEST(c, cc);
7692
7693 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7694 {
7695 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7696 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7697 numberofcmps++;
7698 }
7699 else if (numberofcmps > 0)
7700 {
7701 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7702 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7703 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7704 numberofcmps = 0;
7705 }
7706 else
7707 {
7708 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7709 numberofcmps = 0;
7710 }
7711 }
7712 #ifdef SUPPORT_UNICODE
7713 else
7714 {
7715 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7716 if (*cc == XCL_NOTPROP)
7717 invertcmp ^= 0x1;
7718 cc++;
7719 switch(*cc)
7720 {
7721 case PT_ANY:
7722 if (!invertcmp)
7723 jump = JUMP(SLJIT_JUMP);
7724 break;
7725
7726 case PT_LAMP:
7727 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7728 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7729 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7730 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7731 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7732 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7733 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7734 break;
7735
7736 case PT_GC:
7737 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7738 SET_TYPE_OFFSET(c);
7739 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7740 break;
7741
7742 case PT_PC:
7743 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7744 break;
7745
7746 case PT_SC:
7747 compares++;
7748 /* Do nothing. */
7749 break;
7750
7751 case PT_SPACE:
7752 case PT_PXSPACE:
7753 SET_CHAR_OFFSET(9);
7754 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7755 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7756
7757 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7758 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7759
7760 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7761 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7762
7763 SET_TYPE_OFFSET(ucp_Zl);
7764 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7765 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7766 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7767 break;
7768
7769 case PT_WORD:
7770 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7771 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7772 /* Fall through. */
7773
7774 case PT_ALNUM:
7775 SET_TYPE_OFFSET(ucp_Ll);
7776 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7777 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7778 SET_TYPE_OFFSET(ucp_Nd);
7779 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7780 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7781 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7782 break;
7783
7784 case PT_CLIST:
7785 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7786
7787 /* At least three characters are required.
7788 Otherwise this case would be handled by the normal code path. */
7789 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7790 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7791
7792 /* Optimizing character pairs, if their difference is power of 2. */
7793 if (is_powerof2(other_cases[1] ^ other_cases[0]))
7794 {
7795 if (charoffset == 0)
7796 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7797 else
7798 {
7799 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7800 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7801 }
7802 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7803 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7804 other_cases += 2;
7805 }
7806 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7807 {
7808 if (charoffset == 0)
7809 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7810 else
7811 {
7812 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7813 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7814 }
7815 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7816 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7817
7818 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7819 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7820
7821 other_cases += 3;
7822 }
7823 else
7824 {
7825 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7826 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7827 }
7828
7829 while (*other_cases != NOTACHAR)
7830 {
7831 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7832 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7833 }
7834 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7835 break;
7836
7837 case PT_UCNC:
7838 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7839 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7840 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7841 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7842 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7843 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7844
7845 SET_CHAR_OFFSET(0xa0);
7846 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7847 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7848 SET_CHAR_OFFSET(0);
7849 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7850 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7851 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7852 break;
7853
7854 case PT_PXGRAPH:
7855 /* C and Z groups are the farthest two groups. */
7856 SET_TYPE_OFFSET(ucp_Ll);
7857 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7859
7860 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7861
7862 /* In case of ucp_Cf, we overwrite the result. */
7863 SET_CHAR_OFFSET(0x2066);
7864 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7865 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7866
7867 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7868 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7869
7870 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7871 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7872
7873 JUMPHERE(jump);
7874 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7875 break;
7876
7877 case PT_PXPRINT:
7878 /* C and Z groups are the farthest two groups. */
7879 SET_TYPE_OFFSET(ucp_Ll);
7880 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7881 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7882
7883 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7884 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7885
7886 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7887
7888 /* In case of ucp_Cf, we overwrite the result. */
7889 SET_CHAR_OFFSET(0x2066);
7890 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7891 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7892
7893 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7894 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7895
7896 JUMPHERE(jump);
7897 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7898 break;
7899
7900 case PT_PXPUNCT:
7901 SET_TYPE_OFFSET(ucp_Sc);
7902 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7903 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7904
7905 SET_CHAR_OFFSET(0);
7906 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7907 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7908
7909 SET_TYPE_OFFSET(ucp_Pc);
7910 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7911 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7912 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7913 break;
7914
7915 default:
7916 SLJIT_UNREACHABLE();
7917 break;
7918 }
7919 cc += 2;
7920 }
7921 #endif /* SUPPORT_UNICODE */
7922
7923 if (jump != NULL)
7924 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7925 }
7926
7927 if (found != NULL)
7928 set_jumps(found, LABEL());
7929 }
7930
7931 #undef SET_TYPE_OFFSET
7932 #undef SET_CHAR_OFFSET
7933
7934 #endif
7935
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)7936 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
7937 {
7938 DEFINE_COMPILER;
7939 int length;
7940 struct sljit_jump *jump[4];
7941 #ifdef SUPPORT_UNICODE
7942 struct sljit_label *label;
7943 #endif /* SUPPORT_UNICODE */
7944
7945 switch(type)
7946 {
7947 case OP_SOD:
7948 if (HAS_VIRTUAL_REGISTERS)
7949 {
7950 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7952 }
7953 else
7954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7955 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7956 return cc;
7957
7958 case OP_SOM:
7959 if (HAS_VIRTUAL_REGISTERS)
7960 {
7961 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7963 }
7964 else
7965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
7966 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7967 return cc;
7968
7969 case OP_NOT_WORD_BOUNDARY:
7970 case OP_WORD_BOUNDARY:
7971 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
7972 #ifdef SUPPORT_UNICODE
7973 if (common->invalid_utf)
7974 {
7975 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7976 return cc;
7977 }
7978 #endif /* SUPPORT_UNICODE */
7979 sljit_set_current_flags(compiler, SLJIT_SET_Z);
7980 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7981 return cc;
7982
7983 case OP_EODN:
7984 /* Requires rather complex checks. */
7985 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7986 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7987 {
7988 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7989 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7990 if (common->mode == PCRE2_JIT_COMPLETE)
7991 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7992 else
7993 {
7994 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7995 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
7996 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7997 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7998 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7999 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8000 check_partial(common, TRUE);
8001 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8002 JUMPHERE(jump[1]);
8003 }
8004 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8005 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8006 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8007 }
8008 else if (common->nltype == NLTYPE_FIXED)
8009 {
8010 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8011 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8012 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8013 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8014 }
8015 else
8016 {
8017 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8018 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8019 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8020 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8021 jump[2] = JUMP(SLJIT_GREATER);
8022 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8023 /* Equal. */
8024 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8025 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8026 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8027
8028 JUMPHERE(jump[1]);
8029 if (common->nltype == NLTYPE_ANYCRLF)
8030 {
8031 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8032 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8033 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8034 }
8035 else
8036 {
8037 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8038 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8039 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8040 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8041 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8042 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8043 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8044 }
8045 JUMPHERE(jump[2]);
8046 JUMPHERE(jump[3]);
8047 }
8048 JUMPHERE(jump[0]);
8049 if (common->mode != PCRE2_JIT_COMPLETE)
8050 check_partial(common, TRUE);
8051 return cc;
8052
8053 case OP_EOD:
8054 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8055 if (common->mode != PCRE2_JIT_COMPLETE)
8056 check_partial(common, TRUE);
8057 return cc;
8058
8059 case OP_DOLL:
8060 if (HAS_VIRTUAL_REGISTERS)
8061 {
8062 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8063 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8064 }
8065 else
8066 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8067 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8068
8069 if (!common->endonly)
8070 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8071 else
8072 {
8073 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8074 check_partial(common, FALSE);
8075 }
8076 return cc;
8077
8078 case OP_DOLLM:
8079 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8080 if (HAS_VIRTUAL_REGISTERS)
8081 {
8082 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8083 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8084 }
8085 else
8086 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8087 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8088 check_partial(common, FALSE);
8089 jump[0] = JUMP(SLJIT_JUMP);
8090 JUMPHERE(jump[1]);
8091
8092 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8093 {
8094 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8096 if (common->mode == PCRE2_JIT_COMPLETE)
8097 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8098 else
8099 {
8100 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8101 /* STR_PTR = STR_END - IN_UCHARS(1) */
8102 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8103 check_partial(common, TRUE);
8104 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8105 JUMPHERE(jump[1]);
8106 }
8107
8108 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8109 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8110 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8111 }
8112 else
8113 {
8114 peek_char(common, common->nlmax, TMP3, 0, NULL);
8115 check_newlinechar(common, common->nltype, backtracks, FALSE);
8116 }
8117 JUMPHERE(jump[0]);
8118 return cc;
8119
8120 case OP_CIRC:
8121 if (HAS_VIRTUAL_REGISTERS)
8122 {
8123 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8125 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8126 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8127 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8128 }
8129 else
8130 {
8131 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8132 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8133 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8134 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8135 }
8136 return cc;
8137
8138 case OP_CIRCM:
8139 /* TMP2 might be used by peek_char_back. */
8140 if (HAS_VIRTUAL_REGISTERS)
8141 {
8142 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8143 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8144 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8145 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8146 }
8147 else
8148 {
8149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8150 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8151 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8152 }
8153 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8154 jump[0] = JUMP(SLJIT_JUMP);
8155 JUMPHERE(jump[1]);
8156
8157 if (!common->alt_circumflex)
8158 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8159
8160 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8161 {
8162 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8163 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8165 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8166 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8167 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8168 }
8169 else
8170 {
8171 peek_char_back(common, common->nlmax, backtracks);
8172 check_newlinechar(common, common->nltype, backtracks, FALSE);
8173 }
8174 JUMPHERE(jump[0]);
8175 return cc;
8176
8177 case OP_REVERSE:
8178 length = GET(cc, 0);
8179 if (length == 0)
8180 return cc + LINK_SIZE;
8181 if (HAS_VIRTUAL_REGISTERS)
8182 {
8183 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8185 }
8186 else
8187 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8188 #ifdef SUPPORT_UNICODE
8189 if (common->utf)
8190 {
8191 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8192 label = LABEL();
8193 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8194 move_back(common, backtracks, FALSE);
8195 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8196 JUMPTO(SLJIT_NOT_ZERO, label);
8197 }
8198 else
8199 #endif
8200 {
8201 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8202 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8203 }
8204 check_start_used_ptr(common);
8205 return cc + LINK_SIZE;
8206 }
8207 SLJIT_UNREACHABLE();
8208 return cc;
8209 }
8210
8211 #ifdef SUPPORT_UNICODE
8212
8213 #if PCRE2_CODE_UNIT_WIDTH != 32
8214
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8215 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8216 {
8217 PCRE2_SPTR start_subject = args->begin;
8218 PCRE2_SPTR end_subject = args->end;
8219 int lgb, rgb, ricount;
8220 PCRE2_SPTR prevcc, endcc, bptr;
8221 BOOL first = TRUE;
8222 uint32_t c;
8223
8224 prevcc = cc;
8225 endcc = NULL;
8226 do
8227 {
8228 GETCHARINC(c, cc);
8229 rgb = UCD_GRAPHBREAK(c);
8230
8231 if (first)
8232 {
8233 lgb = rgb;
8234 endcc = cc;
8235 first = FALSE;
8236 continue;
8237 }
8238
8239 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8240 break;
8241
8242 /* Not breaking between Regional Indicators is allowed only if there
8243 are an even number of preceding RIs. */
8244
8245 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8246 {
8247 ricount = 0;
8248 bptr = prevcc;
8249
8250 /* bptr is pointing to the left-hand character */
8251 while (bptr > start_subject)
8252 {
8253 bptr--;
8254 BACKCHAR(bptr);
8255 GETCHAR(c, bptr);
8256
8257 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8258 break;
8259
8260 ricount++;
8261 }
8262
8263 if ((ricount & 1) != 0) break; /* Grapheme break required */
8264 }
8265
8266 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8267 allows any number of them before a following Extended_Pictographic. */
8268
8269 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8270 lgb != ucp_gbExtended_Pictographic)
8271 lgb = rgb;
8272
8273 prevcc = endcc;
8274 endcc = cc;
8275 }
8276 while (cc < end_subject);
8277
8278 return endcc;
8279 }
8280
8281 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8282
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8283 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8284 {
8285 PCRE2_SPTR start_subject = args->begin;
8286 PCRE2_SPTR end_subject = args->end;
8287 int lgb, rgb, ricount;
8288 PCRE2_SPTR prevcc, endcc, bptr;
8289 BOOL first = TRUE;
8290 uint32_t c;
8291
8292 prevcc = cc;
8293 endcc = NULL;
8294 do
8295 {
8296 GETCHARINC_INVALID(c, cc, end_subject, break);
8297 rgb = UCD_GRAPHBREAK(c);
8298
8299 if (first)
8300 {
8301 lgb = rgb;
8302 endcc = cc;
8303 first = FALSE;
8304 continue;
8305 }
8306
8307 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8308 break;
8309
8310 /* Not breaking between Regional Indicators is allowed only if there
8311 are an even number of preceding RIs. */
8312
8313 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8314 {
8315 ricount = 0;
8316 bptr = prevcc;
8317
8318 /* bptr is pointing to the left-hand character */
8319 while (bptr > start_subject)
8320 {
8321 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8322
8323 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8324 break;
8325
8326 ricount++;
8327 }
8328
8329 if ((ricount & 1) != 0)
8330 break; /* Grapheme break required */
8331 }
8332
8333 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8334 allows any number of them before a following Extended_Pictographic. */
8335
8336 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8337 lgb != ucp_gbExtended_Pictographic)
8338 lgb = rgb;
8339
8340 prevcc = endcc;
8341 endcc = cc;
8342 }
8343 while (cc < end_subject);
8344
8345 return endcc;
8346 }
8347
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8348 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8349 {
8350 PCRE2_SPTR start_subject = args->begin;
8351 PCRE2_SPTR end_subject = args->end;
8352 int lgb, rgb, ricount;
8353 PCRE2_SPTR bptr;
8354 uint32_t c;
8355
8356 /* Patch by PH */
8357 /* GETCHARINC(c, cc); */
8358 c = *cc++;
8359
8360 #if PCRE2_CODE_UNIT_WIDTH == 32
8361 if (c >= 0x110000)
8362 return NULL;
8363 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8364 lgb = UCD_GRAPHBREAK(c);
8365
8366 while (cc < end_subject)
8367 {
8368 c = *cc;
8369 #if PCRE2_CODE_UNIT_WIDTH == 32
8370 if (c >= 0x110000)
8371 break;
8372 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8373 rgb = UCD_GRAPHBREAK(c);
8374
8375 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8376 break;
8377
8378 /* Not breaking between Regional Indicators is allowed only if there
8379 are an even number of preceding RIs. */
8380
8381 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8382 {
8383 ricount = 0;
8384 bptr = cc - 1;
8385
8386 /* bptr is pointing to the left-hand character */
8387 while (bptr > start_subject)
8388 {
8389 bptr--;
8390 c = *bptr;
8391 #if PCRE2_CODE_UNIT_WIDTH == 32
8392 if (c >= 0x110000)
8393 break;
8394 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8395
8396 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8397
8398 ricount++;
8399 }
8400
8401 if ((ricount & 1) != 0)
8402 break; /* Grapheme break required */
8403 }
8404
8405 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8406 allows any number of them before a following Extended_Pictographic. */
8407
8408 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8409 lgb != ucp_gbExtended_Pictographic)
8410 lgb = rgb;
8411
8412 cc++;
8413 }
8414
8415 return cc;
8416 }
8417
8418 #endif /* SUPPORT_UNICODE */
8419
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8420 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8421 {
8422 DEFINE_COMPILER;
8423 int length;
8424 unsigned int c, oc, bit;
8425 compare_context context;
8426 struct sljit_jump *jump[3];
8427 jump_list *end_list;
8428 #ifdef SUPPORT_UNICODE
8429 PCRE2_UCHAR propdata[5];
8430 #endif /* SUPPORT_UNICODE */
8431
8432 switch(type)
8433 {
8434 case OP_NOT_DIGIT:
8435 case OP_DIGIT:
8436 /* Digits are usually 0-9, so it is worth to optimize them. */
8437 if (check_str_ptr)
8438 detect_partial_match(common, backtracks);
8439 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8440 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8441 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8442 else
8443 #endif
8444 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8445 /* Flip the starting bit in the negative case. */
8446 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8447 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8448 return cc;
8449
8450 case OP_NOT_WHITESPACE:
8451 case OP_WHITESPACE:
8452 if (check_str_ptr)
8453 detect_partial_match(common, backtracks);
8454 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8455 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8456 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8457 else
8458 #endif
8459 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8460 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8461 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8462 return cc;
8463
8464 case OP_NOT_WORDCHAR:
8465 case OP_WORDCHAR:
8466 if (check_str_ptr)
8467 detect_partial_match(common, backtracks);
8468 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8469 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8470 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8471 else
8472 #endif
8473 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8474 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8475 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8476 return cc;
8477
8478 case OP_ANY:
8479 if (check_str_ptr)
8480 detect_partial_match(common, backtracks);
8481 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8482 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8483 {
8484 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8485 end_list = NULL;
8486 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8487 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8488 else
8489 check_str_end(common, &end_list);
8490
8491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8492 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8493 set_jumps(end_list, LABEL());
8494 JUMPHERE(jump[0]);
8495 }
8496 else
8497 check_newlinechar(common, common->nltype, backtracks, TRUE);
8498 return cc;
8499
8500 case OP_ALLANY:
8501 if (check_str_ptr)
8502 detect_partial_match(common, backtracks);
8503 #ifdef SUPPORT_UNICODE
8504 if (common->utf)
8505 {
8506 if (common->invalid_utf)
8507 {
8508 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8509 return cc;
8510 }
8511
8512 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8513 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8514 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8515 #if PCRE2_CODE_UNIT_WIDTH == 8
8516 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8517 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8518 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8519 #elif PCRE2_CODE_UNIT_WIDTH == 16
8520 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8521 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8522 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8523 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8524 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8526 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8527 JUMPHERE(jump[0]);
8528 return cc;
8529 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8530 }
8531 #endif /* SUPPORT_UNICODE */
8532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8533 return cc;
8534
8535 case OP_ANYBYTE:
8536 if (check_str_ptr)
8537 detect_partial_match(common, backtracks);
8538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8539 return cc;
8540
8541 #ifdef SUPPORT_UNICODE
8542 case OP_NOTPROP:
8543 case OP_PROP:
8544 propdata[0] = XCL_HASPROP;
8545 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8546 propdata[2] = cc[0];
8547 propdata[3] = cc[1];
8548 propdata[4] = XCL_END;
8549 if (check_str_ptr)
8550 detect_partial_match(common, backtracks);
8551 compile_xclass_matchingpath(common, propdata, backtracks);
8552 return cc + 2;
8553 #endif
8554
8555 case OP_ANYNL:
8556 if (check_str_ptr)
8557 detect_partial_match(common, backtracks);
8558 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8559 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8560 /* We don't need to handle soft partial matching case. */
8561 end_list = NULL;
8562 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8563 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8564 else
8565 check_str_end(common, &end_list);
8566 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8567 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8568 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8569 jump[2] = JUMP(SLJIT_JUMP);
8570 JUMPHERE(jump[0]);
8571 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8572 set_jumps(end_list, LABEL());
8573 JUMPHERE(jump[1]);
8574 JUMPHERE(jump[2]);
8575 return cc;
8576
8577 case OP_NOT_HSPACE:
8578 case OP_HSPACE:
8579 if (check_str_ptr)
8580 detect_partial_match(common, backtracks);
8581
8582 if (type == OP_NOT_HSPACE)
8583 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8584 else
8585 read_char(common, 0x9, 0x3000, NULL, 0);
8586
8587 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8588 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8589 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8590 return cc;
8591
8592 case OP_NOT_VSPACE:
8593 case OP_VSPACE:
8594 if (check_str_ptr)
8595 detect_partial_match(common, backtracks);
8596
8597 if (type == OP_NOT_VSPACE)
8598 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8599 else
8600 read_char(common, 0xa, 0x2029, NULL, 0);
8601
8602 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8603 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8604 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8605 return cc;
8606
8607 #ifdef SUPPORT_UNICODE
8608 case OP_EXTUNI:
8609 if (check_str_ptr)
8610 detect_partial_match(common, backtracks);
8611
8612 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8613 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8614
8615 #if PCRE2_CODE_UNIT_WIDTH != 32
8616 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8617 common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8618 if (common->invalid_utf)
8619 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8620 #else
8621 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8622 common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8623 if (!common->utf || common->invalid_utf)
8624 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8625 #endif
8626
8627 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8628
8629 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8630 {
8631 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8632 /* Since we successfully read a char above, partial matching must occure. */
8633 check_partial(common, TRUE);
8634 JUMPHERE(jump[0]);
8635 }
8636 return cc;
8637 #endif
8638
8639 case OP_CHAR:
8640 case OP_CHARI:
8641 length = 1;
8642 #ifdef SUPPORT_UNICODE
8643 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8644 #endif
8645
8646 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8647 detect_partial_match(common, backtracks);
8648
8649 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8650 {
8651 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8652 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8653 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8654
8655 context.length = IN_UCHARS(length);
8656 context.sourcereg = -1;
8657 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8658 context.ucharptr = 0;
8659 #endif
8660 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8661 }
8662
8663 #ifdef SUPPORT_UNICODE
8664 if (common->utf)
8665 {
8666 GETCHAR(c, cc);
8667 }
8668 else
8669 #endif
8670 c = *cc;
8671
8672 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8673
8674 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8675 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8676
8677 oc = char_othercase(common, c);
8678 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8679
8680 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8681
8682 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8683 {
8684 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8685 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8686 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8687 }
8688 else
8689 {
8690 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8691 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8692 JUMPHERE(jump[0]);
8693 }
8694 return cc + length;
8695
8696 case OP_NOT:
8697 case OP_NOTI:
8698 if (check_str_ptr)
8699 detect_partial_match(common, backtracks);
8700
8701 length = 1;
8702 #ifdef SUPPORT_UNICODE
8703 if (common->utf)
8704 {
8705 #if PCRE2_CODE_UNIT_WIDTH == 8
8706 c = *cc;
8707 if (c < 128 && !common->invalid_utf)
8708 {
8709 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8710 if (type == OP_NOT || !char_has_othercase(common, cc))
8711 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8712 else
8713 {
8714 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8715 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8716 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8717 }
8718 /* Skip the variable-length character. */
8719 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8720 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8721 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8723 JUMPHERE(jump[0]);
8724 return cc + 1;
8725 }
8726 else
8727 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8728 {
8729 GETCHARLEN(c, cc, length);
8730 }
8731 }
8732 else
8733 #endif /* SUPPORT_UNICODE */
8734 c = *cc;
8735
8736 if (type == OP_NOT || !char_has_othercase(common, cc))
8737 {
8738 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8739 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8740 }
8741 else
8742 {
8743 oc = char_othercase(common, c);
8744 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8745 bit = c ^ oc;
8746 if (is_powerof2(bit))
8747 {
8748 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8749 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8750 }
8751 else
8752 {
8753 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8754 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8755 }
8756 }
8757 return cc + length;
8758
8759 case OP_CLASS:
8760 case OP_NCLASS:
8761 if (check_str_ptr)
8762 detect_partial_match(common, backtracks);
8763
8764 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8765 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8766 if (type == OP_NCLASS)
8767 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8768 else
8769 read_char(common, 0, bit, NULL, 0);
8770 #else
8771 if (type == OP_NCLASS)
8772 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8773 else
8774 read_char(common, 0, 255, NULL, 0);
8775 #endif
8776
8777 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8778 return cc + 32 / sizeof(PCRE2_UCHAR);
8779
8780 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8781 jump[0] = NULL;
8782 if (common->utf)
8783 {
8784 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8785 if (type == OP_CLASS)
8786 {
8787 add_jump(compiler, backtracks, jump[0]);
8788 jump[0] = NULL;
8789 }
8790 }
8791 #elif PCRE2_CODE_UNIT_WIDTH != 8
8792 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8793 if (type == OP_CLASS)
8794 {
8795 add_jump(compiler, backtracks, jump[0]);
8796 jump[0] = NULL;
8797 }
8798 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8799
8800 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8801 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8802 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8803 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8804 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8805 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8806
8807 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8808 if (jump[0] != NULL)
8809 JUMPHERE(jump[0]);
8810 #endif
8811 return cc + 32 / sizeof(PCRE2_UCHAR);
8812
8813 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8814 case OP_XCLASS:
8815 if (check_str_ptr)
8816 detect_partial_match(common, backtracks);
8817 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8818 return cc + GET(cc, 0) - 1;
8819 #endif
8820 }
8821 SLJIT_UNREACHABLE();
8822 return cc;
8823 }
8824
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8825 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8826 {
8827 /* This function consumes at least one input character. */
8828 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8829 DEFINE_COMPILER;
8830 PCRE2_SPTR ccbegin = cc;
8831 compare_context context;
8832 int size;
8833
8834 context.length = 0;
8835 do
8836 {
8837 if (cc >= ccend)
8838 break;
8839
8840 if (*cc == OP_CHAR)
8841 {
8842 size = 1;
8843 #ifdef SUPPORT_UNICODE
8844 if (common->utf && HAS_EXTRALEN(cc[1]))
8845 size += GET_EXTRALEN(cc[1]);
8846 #endif
8847 }
8848 else if (*cc == OP_CHARI)
8849 {
8850 size = 1;
8851 #ifdef SUPPORT_UNICODE
8852 if (common->utf)
8853 {
8854 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8855 size = 0;
8856 else if (HAS_EXTRALEN(cc[1]))
8857 size += GET_EXTRALEN(cc[1]);
8858 }
8859 else
8860 #endif
8861 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8862 size = 0;
8863 }
8864 else
8865 size = 0;
8866
8867 cc += 1 + size;
8868 context.length += IN_UCHARS(size);
8869 }
8870 while (size > 0 && context.length <= 128);
8871
8872 cc = ccbegin;
8873 if (context.length > 0)
8874 {
8875 /* We have a fixed-length byte sequence. */
8876 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8877 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8878
8879 context.sourcereg = -1;
8880 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8881 context.ucharptr = 0;
8882 #endif
8883 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8884 return cc;
8885 }
8886
8887 /* A non-fixed length character will be checked if length == 0. */
8888 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8889 }
8890
8891 /* Forward definitions. */
8892 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8893 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8894
8895 #define PUSH_BACKTRACK(size, ccstart, error) \
8896 do \
8897 { \
8898 backtrack = sljit_alloc_memory(compiler, (size)); \
8899 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8900 return error; \
8901 memset(backtrack, 0, size); \
8902 backtrack->prev = parent->top; \
8903 backtrack->cc = (ccstart); \
8904 parent->top = backtrack; \
8905 } \
8906 while (0)
8907
8908 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8909 do \
8910 { \
8911 backtrack = sljit_alloc_memory(compiler, (size)); \
8912 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8913 return; \
8914 memset(backtrack, 0, size); \
8915 backtrack->prev = parent->top; \
8916 backtrack->cc = (ccstart); \
8917 parent->top = backtrack; \
8918 } \
8919 while (0)
8920
8921 #define BACKTRACK_AS(type) ((type *)backtrack)
8922
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)8923 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
8924 {
8925 /* The OVECTOR offset goes to TMP2. */
8926 DEFINE_COMPILER;
8927 int count = GET2(cc, 1 + IMM2_SIZE);
8928 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
8929 unsigned int offset;
8930 jump_list *found = NULL;
8931
8932 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
8933
8934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8935
8936 count--;
8937 while (count-- > 0)
8938 {
8939 offset = GET2(slot, 0) << 1;
8940 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8941 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8942 slot += common->name_entry_size;
8943 }
8944
8945 offset = GET2(slot, 0) << 1;
8946 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8947 if (backtracks != NULL && !common->unset_backref)
8948 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8949
8950 set_jumps(found, LABEL());
8951 }
8952
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)8953 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
8954 {
8955 DEFINE_COMPILER;
8956 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8957 int offset = 0;
8958 struct sljit_jump *jump = NULL;
8959 struct sljit_jump *partial;
8960 struct sljit_jump *nopartial;
8961 #if defined SUPPORT_UNICODE
8962 struct sljit_label *loop;
8963 struct sljit_label *caseless_loop;
8964 jump_list *no_match = NULL;
8965 int source_reg = COUNT_MATCH;
8966 int source_end_reg = ARGUMENTS;
8967 int char1_reg = STACK_LIMIT;
8968 #endif /* SUPPORT_UNICODE */
8969
8970 if (ref)
8971 {
8972 offset = GET2(cc, 1) << 1;
8973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8974 /* OVECTOR(1) contains the "string begin - 1" constant. */
8975 if (withchecks && !common->unset_backref)
8976 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8977 }
8978 else
8979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8980
8981 #if defined SUPPORT_UNICODE
8982 if (common->utf && *cc == OP_REFI)
8983 {
8984 SLJIT_ASSERT(common->iref_ptr != 0);
8985
8986 if (ref)
8987 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8988 else
8989 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8990
8991 if (withchecks && emptyfail)
8992 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
8993
8994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
8995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
8996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
8997
8998 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
8999 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9000
9001 loop = LABEL();
9002 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9003 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9004
9005 /* Read original character. It must be a valid UTF character. */
9006 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9007 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9008
9009 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9010
9011 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9012 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9013 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9014
9015 /* Read second character. */
9016 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9017
9018 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9019
9020 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9021
9022 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9023
9024 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9025 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9026 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9027
9028 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9029
9030 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9031 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9032 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9033 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9034
9035 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9036 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9037 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9038
9039 caseless_loop = LABEL();
9040 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9041 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9042 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9043 JUMPTO(SLJIT_EQUAL, loop);
9044 JUMPTO(SLJIT_LESS, caseless_loop);
9045
9046 set_jumps(no_match, LABEL());
9047 if (common->mode == PCRE2_JIT_COMPLETE)
9048 JUMPHERE(partial);
9049
9050 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9051 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9052 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9053 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9054
9055 if (common->mode != PCRE2_JIT_COMPLETE)
9056 {
9057 JUMPHERE(partial);
9058 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9059 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9060 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9061
9062 check_partial(common, FALSE);
9063 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9064 }
9065
9066 JUMPHERE(jump);
9067 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9068 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9069 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9070 return;
9071 }
9072 else
9073 #endif /* SUPPORT_UNICODE */
9074 {
9075 if (ref)
9076 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9077 else
9078 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9079
9080 if (withchecks)
9081 jump = JUMP(SLJIT_ZERO);
9082
9083 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9084 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9085 if (common->mode == PCRE2_JIT_COMPLETE)
9086 add_jump(compiler, backtracks, partial);
9087
9088 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9089 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9090
9091 if (common->mode != PCRE2_JIT_COMPLETE)
9092 {
9093 nopartial = JUMP(SLJIT_JUMP);
9094 JUMPHERE(partial);
9095 /* TMP2 -= STR_END - STR_PTR */
9096 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9097 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9098 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9099 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9100 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9101 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9102 JUMPHERE(partial);
9103 check_partial(common, FALSE);
9104 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9105 JUMPHERE(nopartial);
9106 }
9107 }
9108
9109 if (jump != NULL)
9110 {
9111 if (emptyfail)
9112 add_jump(compiler, backtracks, jump);
9113 else
9114 JUMPHERE(jump);
9115 }
9116 }
9117
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9118 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9119 {
9120 DEFINE_COMPILER;
9121 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9122 backtrack_common *backtrack;
9123 PCRE2_UCHAR type;
9124 int offset = 0;
9125 struct sljit_label *label;
9126 struct sljit_jump *zerolength;
9127 struct sljit_jump *jump = NULL;
9128 PCRE2_SPTR ccbegin = cc;
9129 int min = 0, max = 0;
9130 BOOL minimize;
9131
9132 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9133
9134 if (ref)
9135 offset = GET2(cc, 1) << 1;
9136 else
9137 cc += IMM2_SIZE;
9138 type = cc[1 + IMM2_SIZE];
9139
9140 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9141 minimize = (type & 0x1) != 0;
9142 switch(type)
9143 {
9144 case OP_CRSTAR:
9145 case OP_CRMINSTAR:
9146 min = 0;
9147 max = 0;
9148 cc += 1 + IMM2_SIZE + 1;
9149 break;
9150 case OP_CRPLUS:
9151 case OP_CRMINPLUS:
9152 min = 1;
9153 max = 0;
9154 cc += 1 + IMM2_SIZE + 1;
9155 break;
9156 case OP_CRQUERY:
9157 case OP_CRMINQUERY:
9158 min = 0;
9159 max = 1;
9160 cc += 1 + IMM2_SIZE + 1;
9161 break;
9162 case OP_CRRANGE:
9163 case OP_CRMINRANGE:
9164 min = GET2(cc, 1 + IMM2_SIZE + 1);
9165 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9166 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9167 break;
9168 default:
9169 SLJIT_UNREACHABLE();
9170 break;
9171 }
9172
9173 if (!minimize)
9174 {
9175 if (min == 0)
9176 {
9177 allocate_stack(common, 2);
9178 if (ref)
9179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9182 /* Temporary release of STR_PTR. */
9183 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9184 /* Handles both invalid and empty cases. Since the minimum repeat,
9185 is zero the invalid case is basically the same as an empty case. */
9186 if (ref)
9187 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9188 else
9189 {
9190 compile_dnref_search(common, ccbegin, NULL);
9191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9193 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9194 }
9195 /* Restore if not zero length. */
9196 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9197 }
9198 else
9199 {
9200 allocate_stack(common, 1);
9201 if (ref)
9202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9204 if (ref)
9205 {
9206 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9207 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9208 }
9209 else
9210 {
9211 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9214 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9215 }
9216 }
9217
9218 if (min > 1 || max > 1)
9219 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9220
9221 label = LABEL();
9222 if (!ref)
9223 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9224 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9225
9226 if (min > 1 || max > 1)
9227 {
9228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9229 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9231 if (min > 1)
9232 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9233 if (max > 1)
9234 {
9235 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9236 allocate_stack(common, 1);
9237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9238 JUMPTO(SLJIT_JUMP, label);
9239 JUMPHERE(jump);
9240 }
9241 }
9242
9243 if (max == 0)
9244 {
9245 /* Includes min > 1 case as well. */
9246 allocate_stack(common, 1);
9247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9248 JUMPTO(SLJIT_JUMP, label);
9249 }
9250
9251 JUMPHERE(zerolength);
9252 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9253
9254 count_match(common);
9255 return cc;
9256 }
9257
9258 allocate_stack(common, ref ? 2 : 3);
9259 if (ref)
9260 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9262 if (type != OP_CRMINSTAR)
9263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9264
9265 if (min == 0)
9266 {
9267 /* Handles both invalid and empty cases. Since the minimum repeat,
9268 is zero the invalid case is basically the same as an empty case. */
9269 if (ref)
9270 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9271 else
9272 {
9273 compile_dnref_search(common, ccbegin, NULL);
9274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9275 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9276 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9277 }
9278 /* Length is non-zero, we can match real repeats. */
9279 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9280 jump = JUMP(SLJIT_JUMP);
9281 }
9282 else
9283 {
9284 if (ref)
9285 {
9286 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9287 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9288 }
9289 else
9290 {
9291 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9294 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9295 }
9296 }
9297
9298 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9299 if (max > 0)
9300 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9301
9302 if (!ref)
9303 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9304 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9305 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9306
9307 if (min > 1)
9308 {
9309 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9310 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9312 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9313 }
9314 else if (max > 0)
9315 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9316
9317 if (jump != NULL)
9318 JUMPHERE(jump);
9319 JUMPHERE(zerolength);
9320
9321 count_match(common);
9322 return cc;
9323 }
9324
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9325 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9326 {
9327 DEFINE_COMPILER;
9328 backtrack_common *backtrack;
9329 recurse_entry *entry = common->entries;
9330 recurse_entry *prev = NULL;
9331 sljit_sw start = GET(cc, 1);
9332 PCRE2_SPTR start_cc;
9333 BOOL needs_control_head;
9334
9335 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9336
9337 /* Inlining simple patterns. */
9338 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9339 {
9340 start_cc = common->start + start;
9341 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9342 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9343 return cc + 1 + LINK_SIZE;
9344 }
9345
9346 while (entry != NULL)
9347 {
9348 if (entry->start == start)
9349 break;
9350 prev = entry;
9351 entry = entry->next;
9352 }
9353
9354 if (entry == NULL)
9355 {
9356 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9357 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9358 return NULL;
9359 entry->next = NULL;
9360 entry->entry_label = NULL;
9361 entry->backtrack_label = NULL;
9362 entry->entry_calls = NULL;
9363 entry->backtrack_calls = NULL;
9364 entry->start = start;
9365
9366 if (prev != NULL)
9367 prev->next = entry;
9368 else
9369 common->entries = entry;
9370 }
9371
9372 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9373
9374 if (entry->entry_label == NULL)
9375 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9376 else
9377 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9378 /* Leave if the match is failed. */
9379 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9380 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9381 return cc + 1 + LINK_SIZE;
9382 }
9383
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9384 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9385 {
9386 PCRE2_SPTR begin;
9387 PCRE2_SIZE *ovector;
9388 sljit_u32 oveccount, capture_top;
9389
9390 if (arguments->callout == NULL)
9391 return 0;
9392
9393 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9394
9395 begin = arguments->begin;
9396 ovector = (PCRE2_SIZE*)(callout_block + 1);
9397 oveccount = callout_block->capture_top;
9398
9399 SLJIT_ASSERT(oveccount >= 1);
9400
9401 callout_block->version = 2;
9402 callout_block->callout_flags = 0;
9403
9404 /* Offsets in subject. */
9405 callout_block->subject_length = arguments->end - arguments->begin;
9406 callout_block->start_match = jit_ovector[0] - begin;
9407 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9408 callout_block->subject = begin;
9409
9410 /* Convert and copy the JIT offset vector to the ovector array. */
9411 callout_block->capture_top = 1;
9412 callout_block->offset_vector = ovector;
9413
9414 ovector[0] = PCRE2_UNSET;
9415 ovector[1] = PCRE2_UNSET;
9416 ovector += 2;
9417 jit_ovector += 2;
9418 capture_top = 1;
9419
9420 /* Convert pointers to sizes. */
9421 while (--oveccount != 0)
9422 {
9423 capture_top++;
9424
9425 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9426 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9427
9428 if (ovector[0] != PCRE2_UNSET)
9429 callout_block->capture_top = capture_top;
9430
9431 ovector += 2;
9432 jit_ovector += 2;
9433 }
9434
9435 return (arguments->callout)(callout_block, arguments->callout_data);
9436 }
9437
9438 #define CALLOUT_ARG_OFFSET(arg) \
9439 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9440
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9441 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9442 {
9443 DEFINE_COMPILER;
9444 backtrack_common *backtrack;
9445 sljit_s32 mov_opcode;
9446 unsigned int callout_length = (*cc == OP_CALLOUT)
9447 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9448 sljit_sw value1;
9449 sljit_sw value2;
9450 sljit_sw value3;
9451 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9452
9453 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9454
9455 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9456
9457 allocate_stack(common, callout_arg_size);
9458
9459 SLJIT_ASSERT(common->capture_last_ptr != 0);
9460 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9461 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9462 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9463 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9464 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9465 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9466
9467 /* These pointer sized fields temporarly stores internal variables. */
9468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9469
9470 if (common->mark_ptr != 0)
9471 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9472 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9473 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9474 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9475
9476 if (*cc == OP_CALLOUT)
9477 {
9478 value1 = 0;
9479 value2 = 0;
9480 value3 = 0;
9481 }
9482 else
9483 {
9484 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9485 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9486 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9487 }
9488
9489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9490 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9491 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9493
9494 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9495
9496 /* Needed to save important temporary registers. */
9497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9498 /* SLJIT_R0 = arguments */
9499 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9500 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9501 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9502 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9503 free_stack(common, callout_arg_size);
9504
9505 /* Check return value. */
9506 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9507 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
9508 if (common->abort_label == NULL)
9509 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
9510 else
9511 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
9512 return cc + callout_length;
9513 }
9514
9515 #undef CALLOUT_ARG_SIZE
9516 #undef CALLOUT_ARG_OFFSET
9517
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9518 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9519 {
9520 while (TRUE)
9521 {
9522 switch (*cc)
9523 {
9524 case OP_CALLOUT_STR:
9525 cc += GET(cc, 1 + 2*LINK_SIZE);
9526 break;
9527
9528 case OP_NOT_WORD_BOUNDARY:
9529 case OP_WORD_BOUNDARY:
9530 case OP_CIRC:
9531 case OP_CIRCM:
9532 case OP_DOLL:
9533 case OP_DOLLM:
9534 case OP_CALLOUT:
9535 case OP_ALT:
9536 cc += PRIV(OP_lengths)[*cc];
9537 break;
9538
9539 case OP_KET:
9540 return FALSE;
9541
9542 default:
9543 return TRUE;
9544 }
9545 }
9546 }
9547
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9548 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9549 {
9550 DEFINE_COMPILER;
9551 int framesize;
9552 int extrasize;
9553 BOOL local_quit_available = FALSE;
9554 BOOL needs_control_head;
9555 int private_data_ptr;
9556 backtrack_common altbacktrack;
9557 PCRE2_SPTR ccbegin;
9558 PCRE2_UCHAR opcode;
9559 PCRE2_UCHAR bra = OP_BRA;
9560 jump_list *tmp = NULL;
9561 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9562 jump_list **found;
9563 /* Saving previous accept variables. */
9564 BOOL save_local_quit_available = common->local_quit_available;
9565 BOOL save_in_positive_assertion = common->in_positive_assertion;
9566 then_trap_backtrack *save_then_trap = common->then_trap;
9567 struct sljit_label *save_quit_label = common->quit_label;
9568 struct sljit_label *save_accept_label = common->accept_label;
9569 jump_list *save_quit = common->quit;
9570 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9571 jump_list *save_accept = common->accept;
9572 struct sljit_jump *jump;
9573 struct sljit_jump *brajump = NULL;
9574
9575 /* Assert captures then. */
9576 common->then_trap = NULL;
9577
9578 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9579 {
9580 SLJIT_ASSERT(!conditional);
9581 bra = *cc;
9582 cc++;
9583 }
9584 private_data_ptr = PRIVATE_DATA(cc);
9585 SLJIT_ASSERT(private_data_ptr != 0);
9586 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9587 backtrack->framesize = framesize;
9588 backtrack->private_data_ptr = private_data_ptr;
9589 opcode = *cc;
9590 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9591 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9592 ccbegin = cc;
9593 cc += GET(cc, 1);
9594
9595 if (bra == OP_BRAMINZERO)
9596 {
9597 /* This is a braminzero backtrack path. */
9598 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9599 free_stack(common, 1);
9600 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9601 }
9602
9603 if (framesize < 0)
9604 {
9605 extrasize = 1;
9606 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9607 extrasize = 0;
9608
9609 if (needs_control_head)
9610 extrasize++;
9611
9612 if (framesize == no_frame)
9613 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9614
9615 if (extrasize > 0)
9616 allocate_stack(common, extrasize);
9617
9618 if (needs_control_head)
9619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9620
9621 if (extrasize > 0)
9622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9623
9624 if (needs_control_head)
9625 {
9626 SLJIT_ASSERT(extrasize == 2);
9627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9628 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9629 }
9630 }
9631 else
9632 {
9633 extrasize = needs_control_head ? 3 : 2;
9634 allocate_stack(common, framesize + extrasize);
9635
9636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9637 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9638 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9639 if (needs_control_head)
9640 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9641 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9642
9643 if (needs_control_head)
9644 {
9645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9648 }
9649 else
9650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9651
9652 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9653 }
9654
9655 memset(&altbacktrack, 0, sizeof(backtrack_common));
9656 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9657 {
9658 /* Control verbs cannot escape from these asserts. */
9659 local_quit_available = TRUE;
9660 common->local_quit_available = TRUE;
9661 common->quit_label = NULL;
9662 common->quit = NULL;
9663 }
9664
9665 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9666 common->positive_assertion_quit = NULL;
9667
9668 while (1)
9669 {
9670 common->accept_label = NULL;
9671 common->accept = NULL;
9672 altbacktrack.top = NULL;
9673 altbacktrack.topbacktracks = NULL;
9674
9675 if (*ccbegin == OP_ALT && extrasize > 0)
9676 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9677
9678 altbacktrack.cc = ccbegin;
9679 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9680 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9681 {
9682 if (local_quit_available)
9683 {
9684 common->local_quit_available = save_local_quit_available;
9685 common->quit_label = save_quit_label;
9686 common->quit = save_quit;
9687 }
9688 common->in_positive_assertion = save_in_positive_assertion;
9689 common->then_trap = save_then_trap;
9690 common->accept_label = save_accept_label;
9691 common->positive_assertion_quit = save_positive_assertion_quit;
9692 common->accept = save_accept;
9693 return NULL;
9694 }
9695 common->accept_label = LABEL();
9696 if (common->accept != NULL)
9697 set_jumps(common->accept, common->accept_label);
9698
9699 /* Reset stack. */
9700 if (framesize < 0)
9701 {
9702 if (framesize == no_frame)
9703 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9704 else if (extrasize > 0)
9705 free_stack(common, extrasize);
9706
9707 if (needs_control_head)
9708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9709 }
9710 else
9711 {
9712 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9713 {
9714 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9715 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9716 if (needs_control_head)
9717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9718 }
9719 else
9720 {
9721 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9722 if (needs_control_head)
9723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9724 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9725 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9726 }
9727 }
9728
9729 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9730 {
9731 /* We know that STR_PTR was stored on the top of the stack. */
9732 if (conditional)
9733 {
9734 if (extrasize > 0)
9735 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9736 }
9737 else if (bra == OP_BRAZERO)
9738 {
9739 if (framesize < 0)
9740 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9741 else
9742 {
9743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9744 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9745 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9746 }
9747 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9749 }
9750 else if (framesize >= 0)
9751 {
9752 /* For OP_BRA and OP_BRAMINZERO. */
9753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9754 }
9755 }
9756 add_jump(compiler, found, JUMP(SLJIT_JUMP));
9757
9758 compile_backtrackingpath(common, altbacktrack.top);
9759 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9760 {
9761 if (local_quit_available)
9762 {
9763 common->local_quit_available = save_local_quit_available;
9764 common->quit_label = save_quit_label;
9765 common->quit = save_quit;
9766 }
9767 common->in_positive_assertion = save_in_positive_assertion;
9768 common->then_trap = save_then_trap;
9769 common->accept_label = save_accept_label;
9770 common->positive_assertion_quit = save_positive_assertion_quit;
9771 common->accept = save_accept;
9772 return NULL;
9773 }
9774 set_jumps(altbacktrack.topbacktracks, LABEL());
9775
9776 if (*cc != OP_ALT)
9777 break;
9778
9779 ccbegin = cc;
9780 cc += GET(cc, 1);
9781 }
9782
9783 if (local_quit_available)
9784 {
9785 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9786 /* Makes the check less complicated below. */
9787 common->positive_assertion_quit = common->quit;
9788 }
9789
9790 /* None of them matched. */
9791 if (common->positive_assertion_quit != NULL)
9792 {
9793 jump = JUMP(SLJIT_JUMP);
9794 set_jumps(common->positive_assertion_quit, LABEL());
9795 SLJIT_ASSERT(framesize != no_stack);
9796 if (framesize < 0)
9797 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9798 else
9799 {
9800 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9801 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9802 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9803 }
9804 JUMPHERE(jump);
9805 }
9806
9807 if (needs_control_head)
9808 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9809
9810 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9811 {
9812 /* Assert is failed. */
9813 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9814 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9815
9816 if (framesize < 0)
9817 {
9818 /* The topmost item should be 0. */
9819 if (bra == OP_BRAZERO)
9820 {
9821 if (extrasize == 2)
9822 free_stack(common, 1);
9823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9824 }
9825 else if (extrasize > 0)
9826 free_stack(common, extrasize);
9827 }
9828 else
9829 {
9830 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9831 /* The topmost item should be 0. */
9832 if (bra == OP_BRAZERO)
9833 {
9834 free_stack(common, framesize + extrasize - 1);
9835 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9836 }
9837 else
9838 free_stack(common, framesize + extrasize);
9839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9840 }
9841 jump = JUMP(SLJIT_JUMP);
9842 if (bra != OP_BRAZERO)
9843 add_jump(compiler, target, jump);
9844
9845 /* Assert is successful. */
9846 set_jumps(tmp, LABEL());
9847 if (framesize < 0)
9848 {
9849 /* We know that STR_PTR was stored on the top of the stack. */
9850 if (extrasize > 0)
9851 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9852
9853 /* Keep the STR_PTR on the top of the stack. */
9854 if (bra == OP_BRAZERO)
9855 {
9856 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9857 if (extrasize == 2)
9858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9859 }
9860 else if (bra == OP_BRAMINZERO)
9861 {
9862 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9863 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9864 }
9865 }
9866 else
9867 {
9868 if (bra == OP_BRA)
9869 {
9870 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9871 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9872 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9873 }
9874 else
9875 {
9876 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9877 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9878 if (extrasize == 2)
9879 {
9880 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9881 if (bra == OP_BRAMINZERO)
9882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9883 }
9884 else
9885 {
9886 SLJIT_ASSERT(extrasize == 3);
9887 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9888 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9889 }
9890 }
9891 }
9892
9893 if (bra == OP_BRAZERO)
9894 {
9895 backtrack->matchingpath = LABEL();
9896 SET_LABEL(jump, backtrack->matchingpath);
9897 }
9898 else if (bra == OP_BRAMINZERO)
9899 {
9900 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9901 JUMPHERE(brajump);
9902 if (framesize >= 0)
9903 {
9904 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9905 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9907 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9909 }
9910 set_jumps(backtrack->common.topbacktracks, LABEL());
9911 }
9912 }
9913 else
9914 {
9915 /* AssertNot is successful. */
9916 if (framesize < 0)
9917 {
9918 if (extrasize > 0)
9919 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9920
9921 if (bra != OP_BRA)
9922 {
9923 if (extrasize == 2)
9924 free_stack(common, 1);
9925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9926 }
9927 else if (extrasize > 0)
9928 free_stack(common, extrasize);
9929 }
9930 else
9931 {
9932 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9933 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9934 /* The topmost item should be 0. */
9935 if (bra != OP_BRA)
9936 {
9937 free_stack(common, framesize + extrasize - 1);
9938 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9939 }
9940 else
9941 free_stack(common, framesize + extrasize);
9942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9943 }
9944
9945 if (bra == OP_BRAZERO)
9946 backtrack->matchingpath = LABEL();
9947 else if (bra == OP_BRAMINZERO)
9948 {
9949 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9950 JUMPHERE(brajump);
9951 }
9952
9953 if (bra != OP_BRA)
9954 {
9955 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
9956 set_jumps(backtrack->common.topbacktracks, LABEL());
9957 backtrack->common.topbacktracks = NULL;
9958 }
9959 }
9960
9961 if (local_quit_available)
9962 {
9963 common->local_quit_available = save_local_quit_available;
9964 common->quit_label = save_quit_label;
9965 common->quit = save_quit;
9966 }
9967 common->in_positive_assertion = save_in_positive_assertion;
9968 common->then_trap = save_then_trap;
9969 common->accept_label = save_accept_label;
9970 common->positive_assertion_quit = save_positive_assertion_quit;
9971 common->accept = save_accept;
9972 return cc + 1 + LINK_SIZE;
9973 }
9974
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)9975 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
9976 {
9977 DEFINE_COMPILER;
9978 int stacksize;
9979
9980 if (framesize < 0)
9981 {
9982 if (framesize == no_frame)
9983 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9984 else
9985 {
9986 stacksize = needs_control_head ? 1 : 0;
9987 if (ket != OP_KET || has_alternatives)
9988 stacksize++;
9989
9990 if (stacksize > 0)
9991 free_stack(common, stacksize);
9992 }
9993
9994 if (needs_control_head)
9995 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
9996
9997 /* TMP2 which is set here used by OP_KETRMAX below. */
9998 if (ket == OP_KETRMAX)
9999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10000 else if (ket == OP_KETRMIN)
10001 {
10002 /* Move the STR_PTR to the private_data_ptr. */
10003 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10004 }
10005 }
10006 else
10007 {
10008 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10009 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10010 if (needs_control_head)
10011 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10012
10013 if (ket == OP_KETRMAX)
10014 {
10015 /* TMP2 which is set here used by OP_KETRMAX below. */
10016 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10017 }
10018 }
10019 if (needs_control_head)
10020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10021 }
10022
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10023 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10024 {
10025 DEFINE_COMPILER;
10026
10027 if (common->capture_last_ptr != 0)
10028 {
10029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10032 stacksize++;
10033 }
10034 if (common->optimized_cbracket[offset >> 1] == 0)
10035 {
10036 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10038 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10043 stacksize += 2;
10044 }
10045 return stacksize;
10046 }
10047
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10048 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10049 {
10050 if (PRIV(script_run)(ptr, endptr, FALSE))
10051 return endptr;
10052 return NULL;
10053 }
10054
10055 #ifdef SUPPORT_UNICODE
10056
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10057 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10058 {
10059 if (PRIV(script_run)(ptr, endptr, TRUE))
10060 return endptr;
10061 return NULL;
10062 }
10063
10064 #endif /* SUPPORT_UNICODE */
10065
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10066 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10067 {
10068 DEFINE_COMPILER;
10069
10070 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10071
10072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10073 #ifdef SUPPORT_UNICODE
10074 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10075 common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10076 #else
10077 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10078 #endif
10079
10080 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10081 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10082 }
10083
10084 /*
10085 Handling bracketed expressions is probably the most complex part.
10086
10087 Stack layout naming characters:
10088 S - Push the current STR_PTR
10089 0 - Push a 0 (NULL)
10090 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10091 before the next alternative. Not pushed if there are no alternatives.
10092 M - Any values pushed by the current alternative. Can be empty, or anything.
10093 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10094 L - Push the previous local (pointed by localptr) to the stack
10095 () - opional values stored on the stack
10096 ()* - optonal, can be stored multiple times
10097
10098 The following list shows the regular expression templates, their PCRE byte codes
10099 and stack layout supported by pcre-sljit.
10100
10101 (?:) OP_BRA | OP_KET A M
10102 () OP_CBRA | OP_KET C M
10103 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10104 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10105 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10106 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10107 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10108 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10109 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10110 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10111 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10112 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10113 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10114 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10115 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10116 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10117 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10118 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10119 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10120 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10121 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10122 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10123
10124
10125 Stack layout naming characters:
10126 A - Push the alternative index (starting from 0) on the stack.
10127 Not pushed if there is no alternatives.
10128 M - Any values pushed by the current alternative. Can be empty, or anything.
10129
10130 The next list shows the possible content of a bracket:
10131 (|) OP_*BRA | OP_ALT ... M A
10132 (?()|) OP_*COND | OP_ALT M A
10133 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10134 Or nothing, if trace is unnecessary
10135 */
10136
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10137 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10138 {
10139 DEFINE_COMPILER;
10140 backtrack_common *backtrack;
10141 PCRE2_UCHAR opcode;
10142 int private_data_ptr = 0;
10143 int offset = 0;
10144 int i, stacksize;
10145 int repeat_ptr = 0, repeat_length = 0;
10146 int repeat_type = 0, repeat_count = 0;
10147 PCRE2_SPTR ccbegin;
10148 PCRE2_SPTR matchingpath;
10149 PCRE2_SPTR slot;
10150 PCRE2_UCHAR bra = OP_BRA;
10151 PCRE2_UCHAR ket;
10152 assert_backtrack *assert;
10153 BOOL has_alternatives;
10154 BOOL needs_control_head = FALSE;
10155 struct sljit_jump *jump;
10156 struct sljit_jump *skip;
10157 struct sljit_label *rmax_label = NULL;
10158 struct sljit_jump *braminzero = NULL;
10159
10160 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10161
10162 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10163 {
10164 bra = *cc;
10165 cc++;
10166 opcode = *cc;
10167 }
10168
10169 opcode = *cc;
10170 ccbegin = cc;
10171 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10172 ket = *matchingpath;
10173 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10174 {
10175 repeat_ptr = PRIVATE_DATA(matchingpath);
10176 repeat_length = PRIVATE_DATA(matchingpath + 1);
10177 repeat_type = PRIVATE_DATA(matchingpath + 2);
10178 repeat_count = PRIVATE_DATA(matchingpath + 3);
10179 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10180 if (repeat_type == OP_UPTO)
10181 ket = OP_KETRMAX;
10182 if (repeat_type == OP_MINUPTO)
10183 ket = OP_KETRMIN;
10184 }
10185
10186 matchingpath = ccbegin + 1 + LINK_SIZE;
10187 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10188 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10189 cc += GET(cc, 1);
10190
10191 has_alternatives = *cc == OP_ALT;
10192 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10193 {
10194 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10195 compile_time_checks_must_be_grouped_together);
10196 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10197 }
10198
10199 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10200 opcode = OP_SCOND;
10201
10202 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10203 {
10204 /* Capturing brackets has a pre-allocated space. */
10205 offset = GET2(ccbegin, 1 + LINK_SIZE);
10206 if (common->optimized_cbracket[offset] == 0)
10207 {
10208 private_data_ptr = OVECTOR_PRIV(offset);
10209 offset <<= 1;
10210 }
10211 else
10212 {
10213 offset <<= 1;
10214 private_data_ptr = OVECTOR(offset);
10215 }
10216 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10217 matchingpath += IMM2_SIZE;
10218 }
10219 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10220 {
10221 /* Other brackets simply allocate the next entry. */
10222 private_data_ptr = PRIVATE_DATA(ccbegin);
10223 SLJIT_ASSERT(private_data_ptr != 0);
10224 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10225 if (opcode == OP_ONCE)
10226 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10227 }
10228
10229 /* Instructions before the first alternative. */
10230 stacksize = 0;
10231 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10232 stacksize++;
10233 if (bra == OP_BRAZERO)
10234 stacksize++;
10235
10236 if (stacksize > 0)
10237 allocate_stack(common, stacksize);
10238
10239 stacksize = 0;
10240 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10241 {
10242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10243 stacksize++;
10244 }
10245
10246 if (bra == OP_BRAZERO)
10247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10248
10249 if (bra == OP_BRAMINZERO)
10250 {
10251 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10253 if (ket != OP_KETRMIN)
10254 {
10255 free_stack(common, 1);
10256 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10257 }
10258 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10259 {
10260 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10261 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10262 /* Nothing stored during the first run. */
10263 skip = JUMP(SLJIT_JUMP);
10264 JUMPHERE(jump);
10265 /* Checking zero-length iteration. */
10266 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10267 {
10268 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10269 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10270 }
10271 else
10272 {
10273 /* Except when the whole stack frame must be saved. */
10274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10275 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10276 }
10277 JUMPHERE(skip);
10278 }
10279 else
10280 {
10281 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10282 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10283 JUMPHERE(jump);
10284 }
10285 }
10286
10287 if (repeat_type != 0)
10288 {
10289 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10290 if (repeat_type == OP_EXACT)
10291 rmax_label = LABEL();
10292 }
10293
10294 if (ket == OP_KETRMIN)
10295 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10296
10297 if (ket == OP_KETRMAX)
10298 {
10299 rmax_label = LABEL();
10300 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10301 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10302 }
10303
10304 /* Handling capturing brackets and alternatives. */
10305 if (opcode == OP_ONCE)
10306 {
10307 stacksize = 0;
10308 if (needs_control_head)
10309 {
10310 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10311 stacksize++;
10312 }
10313
10314 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10315 {
10316 /* Neither capturing brackets nor recursions are found in the block. */
10317 if (ket == OP_KETRMIN)
10318 {
10319 stacksize += 2;
10320 if (!needs_control_head)
10321 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10322 }
10323 else
10324 {
10325 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10327 if (ket == OP_KETRMAX || has_alternatives)
10328 stacksize++;
10329 }
10330
10331 if (stacksize > 0)
10332 allocate_stack(common, stacksize);
10333
10334 stacksize = 0;
10335 if (needs_control_head)
10336 {
10337 stacksize++;
10338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10339 }
10340
10341 if (ket == OP_KETRMIN)
10342 {
10343 if (needs_control_head)
10344 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10346 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10347 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349 }
10350 else if (ket == OP_KETRMAX || has_alternatives)
10351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10352 }
10353 else
10354 {
10355 if (ket != OP_KET || has_alternatives)
10356 stacksize++;
10357
10358 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10359 allocate_stack(common, stacksize);
10360
10361 if (needs_control_head)
10362 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10363
10364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10365 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10366
10367 stacksize = needs_control_head ? 1 : 0;
10368 if (ket != OP_KET || has_alternatives)
10369 {
10370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10372 stacksize++;
10373 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10374 }
10375 else
10376 {
10377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10379 }
10380 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10381 }
10382 }
10383 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10384 {
10385 /* Saving the previous values. */
10386 if (common->optimized_cbracket[offset >> 1] != 0)
10387 {
10388 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10389 allocate_stack(common, 2);
10390 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10395 }
10396 else
10397 {
10398 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10399 allocate_stack(common, 1);
10400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10402 }
10403 }
10404 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10405 {
10406 /* Saving the previous value. */
10407 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10408 allocate_stack(common, 1);
10409 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10411 }
10412 else if (has_alternatives)
10413 {
10414 /* Pushing the starting string pointer. */
10415 allocate_stack(common, 1);
10416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10417 }
10418
10419 /* Generating code for the first alternative. */
10420 if (opcode == OP_COND || opcode == OP_SCOND)
10421 {
10422 if (*matchingpath == OP_CREF)
10423 {
10424 SLJIT_ASSERT(has_alternatives);
10425 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10426 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10427 matchingpath += 1 + IMM2_SIZE;
10428 }
10429 else if (*matchingpath == OP_DNCREF)
10430 {
10431 SLJIT_ASSERT(has_alternatives);
10432
10433 i = GET2(matchingpath, 1 + IMM2_SIZE);
10434 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10435 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10437 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10438 slot += common->name_entry_size;
10439 i--;
10440 while (i-- > 0)
10441 {
10442 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10443 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10444 slot += common->name_entry_size;
10445 }
10446 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10447 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10448 matchingpath += 1 + 2 * IMM2_SIZE;
10449 }
10450 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10451 {
10452 /* Never has other case. */
10453 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10454 SLJIT_ASSERT(!has_alternatives);
10455
10456 if (*matchingpath == OP_TRUE)
10457 {
10458 stacksize = 1;
10459 matchingpath++;
10460 }
10461 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10462 stacksize = 0;
10463 else if (*matchingpath == OP_RREF)
10464 {
10465 stacksize = GET2(matchingpath, 1);
10466 if (common->currententry == NULL)
10467 stacksize = 0;
10468 else if (stacksize == RREF_ANY)
10469 stacksize = 1;
10470 else if (common->currententry->start == 0)
10471 stacksize = stacksize == 0;
10472 else
10473 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10474
10475 if (stacksize != 0)
10476 matchingpath += 1 + IMM2_SIZE;
10477 }
10478 else
10479 {
10480 if (common->currententry == NULL || common->currententry->start == 0)
10481 stacksize = 0;
10482 else
10483 {
10484 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10485 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10486 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10487 while (stacksize > 0)
10488 {
10489 if ((int)GET2(slot, 0) == i)
10490 break;
10491 slot += common->name_entry_size;
10492 stacksize--;
10493 }
10494 }
10495
10496 if (stacksize != 0)
10497 matchingpath += 1 + 2 * IMM2_SIZE;
10498 }
10499
10500 /* The stacksize == 0 is a common "else" case. */
10501 if (stacksize == 0)
10502 {
10503 if (*cc == OP_ALT)
10504 {
10505 matchingpath = cc + 1 + LINK_SIZE;
10506 cc += GET(cc, 1);
10507 }
10508 else
10509 matchingpath = cc;
10510 }
10511 }
10512 else
10513 {
10514 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10515 /* Similar code as PUSH_BACKTRACK macro. */
10516 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10517 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10518 return NULL;
10519 memset(assert, 0, sizeof(assert_backtrack));
10520 assert->common.cc = matchingpath;
10521 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10522 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10523 }
10524 }
10525
10526 compile_matchingpath(common, matchingpath, cc, backtrack);
10527 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10528 return NULL;
10529
10530 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10531 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10532
10533 if (opcode == OP_ONCE)
10534 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10535
10536 if (opcode == OP_SCRIPT_RUN)
10537 match_script_run_common(common, private_data_ptr, backtrack);
10538
10539 stacksize = 0;
10540 if (repeat_type == OP_MINUPTO)
10541 {
10542 /* We need to preserve the counter. TMP2 will be used below. */
10543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10544 stacksize++;
10545 }
10546 if (ket != OP_KET || bra != OP_BRA)
10547 stacksize++;
10548 if (offset != 0)
10549 {
10550 if (common->capture_last_ptr != 0)
10551 stacksize++;
10552 if (common->optimized_cbracket[offset >> 1] == 0)
10553 stacksize += 2;
10554 }
10555 if (has_alternatives && opcode != OP_ONCE)
10556 stacksize++;
10557
10558 if (stacksize > 0)
10559 allocate_stack(common, stacksize);
10560
10561 stacksize = 0;
10562 if (repeat_type == OP_MINUPTO)
10563 {
10564 /* TMP2 was set above. */
10565 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10566 stacksize++;
10567 }
10568
10569 if (ket != OP_KET || bra != OP_BRA)
10570 {
10571 if (ket != OP_KET)
10572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10573 else
10574 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10575 stacksize++;
10576 }
10577
10578 if (offset != 0)
10579 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10580
10581 /* Skip and count the other alternatives. */
10582 i = 1;
10583 while (*cc == OP_ALT)
10584 {
10585 cc += GET(cc, 1);
10586 i++;
10587 }
10588
10589 if (has_alternatives)
10590 {
10591 if (opcode != OP_ONCE)
10592 {
10593 if (i <= 3)
10594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10595 else
10596 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10597 }
10598 if (ket != OP_KETRMAX)
10599 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10600 }
10601
10602 /* Must be after the matchingpath label. */
10603 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10604 {
10605 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10607 }
10608
10609 if (ket == OP_KETRMAX)
10610 {
10611 if (repeat_type != 0)
10612 {
10613 if (has_alternatives)
10614 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10615 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10616 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10617 /* Drop STR_PTR for greedy plus quantifier. */
10618 if (opcode != OP_ONCE)
10619 free_stack(common, 1);
10620 }
10621 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10622 {
10623 if (has_alternatives)
10624 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10625
10626 /* Checking zero-length iteration. */
10627 if (opcode != OP_ONCE)
10628 {
10629 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10630 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10631 /* Drop STR_PTR for greedy plus quantifier. */
10632 if (bra != OP_BRAZERO)
10633 free_stack(common, 1);
10634 }
10635 else
10636 /* TMP2 must contain the starting STR_PTR. */
10637 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10638 }
10639 else
10640 JUMPTO(SLJIT_JUMP, rmax_label);
10641 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10642 }
10643
10644 if (repeat_type == OP_EXACT)
10645 {
10646 count_match(common);
10647 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10648 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10649 }
10650 else if (repeat_type == OP_UPTO)
10651 {
10652 /* We need to preserve the counter. */
10653 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10654 allocate_stack(common, 1);
10655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10656 }
10657
10658 if (bra == OP_BRAZERO)
10659 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10660
10661 if (bra == OP_BRAMINZERO)
10662 {
10663 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10664 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10665 if (braminzero != NULL)
10666 {
10667 JUMPHERE(braminzero);
10668 /* We need to release the end pointer to perform the
10669 backtrack for the zero-length iteration. When
10670 framesize is < 0, OP_ONCE will do the release itself. */
10671 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10672 {
10673 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10674 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10675 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10676 }
10677 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10678 free_stack(common, 1);
10679 }
10680 /* Continue to the normal backtrack. */
10681 }
10682
10683 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10684 count_match(common);
10685
10686 cc += 1 + LINK_SIZE;
10687
10688 if (opcode == OP_ONCE)
10689 {
10690 /* We temporarily encode the needs_control_head in the lowest bit.
10691 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10692 the same value for small signed numbers (including negative numbers). */
10693 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10694 }
10695 return cc + repeat_length;
10696 }
10697
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10698 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10699 {
10700 DEFINE_COMPILER;
10701 backtrack_common *backtrack;
10702 PCRE2_UCHAR opcode;
10703 int private_data_ptr;
10704 int cbraprivptr = 0;
10705 BOOL needs_control_head;
10706 int framesize;
10707 int stacksize;
10708 int offset = 0;
10709 BOOL zero = FALSE;
10710 PCRE2_SPTR ccbegin = NULL;
10711 int stack; /* Also contains the offset of control head. */
10712 struct sljit_label *loop = NULL;
10713 struct jump_list *emptymatch = NULL;
10714
10715 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10716 if (*cc == OP_BRAPOSZERO)
10717 {
10718 zero = TRUE;
10719 cc++;
10720 }
10721
10722 opcode = *cc;
10723 private_data_ptr = PRIVATE_DATA(cc);
10724 SLJIT_ASSERT(private_data_ptr != 0);
10725 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10726 switch(opcode)
10727 {
10728 case OP_BRAPOS:
10729 case OP_SBRAPOS:
10730 ccbegin = cc + 1 + LINK_SIZE;
10731 break;
10732
10733 case OP_CBRAPOS:
10734 case OP_SCBRAPOS:
10735 offset = GET2(cc, 1 + LINK_SIZE);
10736 /* This case cannot be optimized in the same was as
10737 normal capturing brackets. */
10738 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10739 cbraprivptr = OVECTOR_PRIV(offset);
10740 offset <<= 1;
10741 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10742 break;
10743
10744 default:
10745 SLJIT_UNREACHABLE();
10746 break;
10747 }
10748
10749 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10750 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10751 if (framesize < 0)
10752 {
10753 if (offset != 0)
10754 {
10755 stacksize = 2;
10756 if (common->capture_last_ptr != 0)
10757 stacksize++;
10758 }
10759 else
10760 stacksize = 1;
10761
10762 if (needs_control_head)
10763 stacksize++;
10764 if (!zero)
10765 stacksize++;
10766
10767 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10768 allocate_stack(common, stacksize);
10769 if (framesize == no_frame)
10770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10771
10772 stack = 0;
10773 if (offset != 0)
10774 {
10775 stack = 2;
10776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10777 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10778 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10779 if (common->capture_last_ptr != 0)
10780 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10781 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10782 if (needs_control_head)
10783 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10784 if (common->capture_last_ptr != 0)
10785 {
10786 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10787 stack = 3;
10788 }
10789 }
10790 else
10791 {
10792 if (needs_control_head)
10793 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10795 stack = 1;
10796 }
10797
10798 if (needs_control_head)
10799 stack++;
10800 if (!zero)
10801 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10802 if (needs_control_head)
10803 {
10804 stack--;
10805 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10806 }
10807 }
10808 else
10809 {
10810 stacksize = framesize + 1;
10811 if (!zero)
10812 stacksize++;
10813 if (needs_control_head)
10814 stacksize++;
10815 if (offset == 0)
10816 stacksize++;
10817 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10818
10819 allocate_stack(common, stacksize);
10820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10821 if (needs_control_head)
10822 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10823 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10824
10825 stack = 0;
10826 if (!zero)
10827 {
10828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10829 stack = 1;
10830 }
10831 if (needs_control_head)
10832 {
10833 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10834 stack++;
10835 }
10836 if (offset == 0)
10837 {
10838 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10839 stack++;
10840 }
10841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10842 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10843 stack -= 1 + (offset == 0);
10844 }
10845
10846 if (offset != 0)
10847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10848
10849 loop = LABEL();
10850 while (*cc != OP_KETRPOS)
10851 {
10852 backtrack->top = NULL;
10853 backtrack->topbacktracks = NULL;
10854 cc += GET(cc, 1);
10855
10856 compile_matchingpath(common, ccbegin, cc, backtrack);
10857 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10858 return NULL;
10859
10860 if (framesize < 0)
10861 {
10862 if (framesize == no_frame)
10863 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10864
10865 if (offset != 0)
10866 {
10867 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10869 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10870 if (common->capture_last_ptr != 0)
10871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10873 }
10874 else
10875 {
10876 if (opcode == OP_SBRAPOS)
10877 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10878 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10879 }
10880
10881 /* Even if the match is empty, we need to reset the control head. */
10882 if (needs_control_head)
10883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10884
10885 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10886 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10887
10888 if (!zero)
10889 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10890 }
10891 else
10892 {
10893 if (offset != 0)
10894 {
10895 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10897 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10899 if (common->capture_last_ptr != 0)
10900 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10901 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10902 }
10903 else
10904 {
10905 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10906 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10907 if (opcode == OP_SBRAPOS)
10908 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10909 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10910 }
10911
10912 /* Even if the match is empty, we need to reset the control head. */
10913 if (needs_control_head)
10914 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10915
10916 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10917 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10918
10919 if (!zero)
10920 {
10921 if (framesize < 0)
10922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10923 else
10924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10925 }
10926 }
10927
10928 JUMPTO(SLJIT_JUMP, loop);
10929 flush_stubs(common);
10930
10931 compile_backtrackingpath(common, backtrack->top);
10932 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10933 return NULL;
10934 set_jumps(backtrack->topbacktracks, LABEL());
10935
10936 if (framesize < 0)
10937 {
10938 if (offset != 0)
10939 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10940 else
10941 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10942 }
10943 else
10944 {
10945 if (offset != 0)
10946 {
10947 /* Last alternative. */
10948 if (*cc == OP_KETRPOS)
10949 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10950 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10951 }
10952 else
10953 {
10954 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10955 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10956 }
10957 }
10958
10959 if (*cc == OP_KETRPOS)
10960 break;
10961 ccbegin = cc + 1 + LINK_SIZE;
10962 }
10963
10964 /* We don't have to restore the control head in case of a failed match. */
10965
10966 backtrack->topbacktracks = NULL;
10967 if (!zero)
10968 {
10969 if (framesize < 0)
10970 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
10971 else /* TMP2 is set to [private_data_ptr] above. */
10972 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
10973 }
10974
10975 /* None of them matched. */
10976 set_jumps(emptymatch, LABEL());
10977 count_match(common);
10978 return cc + 1 + LINK_SIZE;
10979 }
10980
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)10981 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
10982 {
10983 int class_len;
10984
10985 *opcode = *cc;
10986 *exact = 0;
10987
10988 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
10989 {
10990 cc++;
10991 *type = OP_CHAR;
10992 }
10993 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
10994 {
10995 cc++;
10996 *type = OP_CHARI;
10997 *opcode -= OP_STARI - OP_STAR;
10998 }
10999 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11000 {
11001 cc++;
11002 *type = OP_NOT;
11003 *opcode -= OP_NOTSTAR - OP_STAR;
11004 }
11005 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11006 {
11007 cc++;
11008 *type = OP_NOTI;
11009 *opcode -= OP_NOTSTARI - OP_STAR;
11010 }
11011 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11012 {
11013 cc++;
11014 *opcode -= OP_TYPESTAR - OP_STAR;
11015 *type = OP_END;
11016 }
11017 else
11018 {
11019 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11020 *type = *opcode;
11021 cc++;
11022 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11023 *opcode = cc[class_len - 1];
11024
11025 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11026 {
11027 *opcode -= OP_CRSTAR - OP_STAR;
11028 *end = cc + class_len;
11029
11030 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11031 {
11032 *exact = 1;
11033 *opcode -= OP_PLUS - OP_STAR;
11034 }
11035 }
11036 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11037 {
11038 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11039 *end = cc + class_len;
11040
11041 if (*opcode == OP_POSPLUS)
11042 {
11043 *exact = 1;
11044 *opcode = OP_POSSTAR;
11045 }
11046 }
11047 else
11048 {
11049 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11050 *max = GET2(cc, (class_len + IMM2_SIZE));
11051 *exact = GET2(cc, class_len);
11052
11053 if (*max == 0)
11054 {
11055 if (*opcode == OP_CRPOSRANGE)
11056 *opcode = OP_POSSTAR;
11057 else
11058 *opcode -= OP_CRRANGE - OP_STAR;
11059 }
11060 else
11061 {
11062 *max -= *exact;
11063 if (*max == 0)
11064 *opcode = OP_EXACT;
11065 else if (*max == 1)
11066 {
11067 if (*opcode == OP_CRPOSRANGE)
11068 *opcode = OP_POSQUERY;
11069 else
11070 *opcode -= OP_CRRANGE - OP_QUERY;
11071 }
11072 else
11073 {
11074 if (*opcode == OP_CRPOSRANGE)
11075 *opcode = OP_POSUPTO;
11076 else
11077 *opcode -= OP_CRRANGE - OP_UPTO;
11078 }
11079 }
11080 *end = cc + class_len + 2 * IMM2_SIZE;
11081 }
11082 return cc;
11083 }
11084
11085 switch(*opcode)
11086 {
11087 case OP_EXACT:
11088 *exact = GET2(cc, 0);
11089 cc += IMM2_SIZE;
11090 break;
11091
11092 case OP_PLUS:
11093 case OP_MINPLUS:
11094 *exact = 1;
11095 *opcode -= OP_PLUS - OP_STAR;
11096 break;
11097
11098 case OP_POSPLUS:
11099 *exact = 1;
11100 *opcode = OP_POSSTAR;
11101 break;
11102
11103 case OP_UPTO:
11104 case OP_MINUPTO:
11105 case OP_POSUPTO:
11106 *max = GET2(cc, 0);
11107 cc += IMM2_SIZE;
11108 break;
11109 }
11110
11111 if (*type == OP_END)
11112 {
11113 *type = *cc;
11114 *end = next_opcode(common, cc);
11115 cc++;
11116 return cc;
11117 }
11118
11119 *end = cc + 1;
11120 #ifdef SUPPORT_UNICODE
11121 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11122 #endif
11123 return cc;
11124 }
11125
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11126 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11127 {
11128 DEFINE_COMPILER;
11129 backtrack_common *backtrack;
11130 PCRE2_UCHAR opcode;
11131 PCRE2_UCHAR type;
11132 sljit_u32 max = 0, exact;
11133 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11134 sljit_s32 early_fail_type;
11135 BOOL charpos_enabled;
11136 PCRE2_UCHAR charpos_char;
11137 unsigned int charpos_othercasebit;
11138 PCRE2_SPTR end;
11139 jump_list *no_match = NULL;
11140 jump_list *no_char1_match = NULL;
11141 struct sljit_jump *jump = NULL;
11142 struct sljit_label *label;
11143 int private_data_ptr = PRIVATE_DATA(cc);
11144 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11145 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11146 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11147 int tmp_base, tmp_offset;
11148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11149 BOOL use_tmp;
11150 #endif
11151
11152 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11153
11154 early_fail_type = (early_fail_ptr & 0x7);
11155 early_fail_ptr >>= 3;
11156
11157 /* During recursion, these optimizations are disabled. */
11158 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11159 {
11160 early_fail_ptr = 0;
11161 early_fail_type = type_skip;
11162 }
11163
11164 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11165 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11166
11167 if (early_fail_type == type_fail)
11168 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11169
11170 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11171
11172 if (type != OP_EXTUNI)
11173 {
11174 tmp_base = TMP3;
11175 tmp_offset = 0;
11176 }
11177 else
11178 {
11179 tmp_base = SLJIT_MEM1(SLJIT_SP);
11180 tmp_offset = POSSESSIVE0;
11181 }
11182
11183 /* Handle fixed part first. */
11184 if (exact > 1)
11185 {
11186 SLJIT_ASSERT(early_fail_ptr == 0);
11187
11188 if (common->mode == PCRE2_JIT_COMPLETE
11189 #ifdef SUPPORT_UNICODE
11190 && !common->utf
11191 #endif
11192 && type != OP_ANYNL && type != OP_EXTUNI)
11193 {
11194 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11195 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11196 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11197 label = LABEL();
11198 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11199 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11200 JUMPTO(SLJIT_NOT_ZERO, label);
11201 }
11202 else
11203 {
11204 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11205 label = LABEL();
11206 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11207 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11208 JUMPTO(SLJIT_NOT_ZERO, label);
11209 }
11210 }
11211 else if (exact == 1)
11212 {
11213 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11214
11215 if (early_fail_type == type_fail_range)
11216 {
11217 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11219 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11220 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11221 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11222
11223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11224 }
11225 }
11226
11227 switch(opcode)
11228 {
11229 case OP_STAR:
11230 case OP_UPTO:
11231 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11232
11233 if (type == OP_ANYNL || type == OP_EXTUNI)
11234 {
11235 SLJIT_ASSERT(private_data_ptr == 0);
11236 SLJIT_ASSERT(early_fail_ptr == 0);
11237
11238 allocate_stack(common, 2);
11239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11241
11242 if (opcode == OP_UPTO)
11243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11244
11245 label = LABEL();
11246 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11247 if (opcode == OP_UPTO)
11248 {
11249 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11250 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11251 jump = JUMP(SLJIT_ZERO);
11252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11253 }
11254
11255 /* We cannot use TMP3 because of allocate_stack. */
11256 allocate_stack(common, 1);
11257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11258 JUMPTO(SLJIT_JUMP, label);
11259 if (jump != NULL)
11260 JUMPHERE(jump);
11261 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11262 break;
11263 }
11264 #ifdef SUPPORT_UNICODE
11265 else if (type == OP_ALLANY && !common->invalid_utf)
11266 #else
11267 else if (type == OP_ALLANY)
11268 #endif
11269 {
11270 if (opcode == OP_STAR)
11271 {
11272 if (private_data_ptr == 0)
11273 allocate_stack(common, 2);
11274
11275 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11276 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11277
11278 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11279 process_partial_match(common);
11280
11281 if (early_fail_ptr != 0)
11282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11283 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11284 break;
11285 }
11286 #ifdef SUPPORT_UNICODE
11287 else if (!common->utf)
11288 #else
11289 else
11290 #endif
11291 {
11292 if (private_data_ptr == 0)
11293 allocate_stack(common, 2);
11294
11295 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11297
11298 if (common->mode == PCRE2_JIT_COMPLETE)
11299 {
11300 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11301 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11302 }
11303 else
11304 {
11305 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11306 process_partial_match(common);
11307 JUMPHERE(jump);
11308 }
11309
11310 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11311
11312 if (early_fail_ptr != 0)
11313 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11314 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11315 break;
11316 }
11317 }
11318
11319 charpos_enabled = FALSE;
11320 charpos_char = 0;
11321 charpos_othercasebit = 0;
11322
11323 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11324 {
11325 #ifdef SUPPORT_UNICODE
11326 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11327 #else
11328 charpos_enabled = TRUE;
11329 #endif
11330 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11331 {
11332 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11333 if (charpos_othercasebit == 0)
11334 charpos_enabled = FALSE;
11335 }
11336
11337 if (charpos_enabled)
11338 {
11339 charpos_char = end[1];
11340 /* Consume the OP_CHAR opcode. */
11341 end += 2;
11342 #if PCRE2_CODE_UNIT_WIDTH == 8
11343 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11344 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11345 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11346 if ((charpos_othercasebit & 0x100) != 0)
11347 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11348 #endif
11349 if (charpos_othercasebit != 0)
11350 charpos_char |= charpos_othercasebit;
11351
11352 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11353 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11354 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11355 }
11356 }
11357
11358 if (charpos_enabled)
11359 {
11360 if (opcode == OP_UPTO)
11361 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11362
11363 /* Search the first instance of charpos_char. */
11364 jump = JUMP(SLJIT_JUMP);
11365 label = LABEL();
11366 if (opcode == OP_UPTO)
11367 {
11368 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11369 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11370 }
11371 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11372 if (early_fail_ptr != 0)
11373 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11374 JUMPHERE(jump);
11375
11376 detect_partial_match(common, &backtrack->topbacktracks);
11377 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11378 if (charpos_othercasebit != 0)
11379 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11380 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11381
11382 if (private_data_ptr == 0)
11383 allocate_stack(common, 2);
11384 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11385 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11386
11387 if (opcode == OP_UPTO)
11388 {
11389 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11390 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11391 }
11392
11393 /* Search the last instance of charpos_char. */
11394 label = LABEL();
11395 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11396 if (early_fail_ptr != 0)
11397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11398 detect_partial_match(common, &no_match);
11399 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11400 if (charpos_othercasebit != 0)
11401 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11402
11403 if (opcode == OP_STAR)
11404 {
11405 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11406 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11407 JUMPTO(SLJIT_JUMP, label);
11408 }
11409 else
11410 {
11411 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11412 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11413 JUMPHERE(jump);
11414 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11415 JUMPTO(SLJIT_NOT_ZERO, label);
11416 }
11417
11418 set_jumps(no_match, LABEL());
11419 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11420 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11421 }
11422 else
11423 {
11424 if (private_data_ptr == 0)
11425 allocate_stack(common, 2);
11426
11427 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11428 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11429 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11430 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11431
11432 if (common->utf)
11433 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11434 #endif
11435 if (opcode == OP_UPTO)
11436 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11437
11438 detect_partial_match(common, &no_match);
11439 label = LABEL();
11440 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11441 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11442 if (common->utf)
11443 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11444 #endif
11445
11446 if (opcode == OP_UPTO)
11447 {
11448 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11449 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11450 }
11451
11452 detect_partial_match_to(common, label);
11453 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11454
11455 set_jumps(no_char1_match, LABEL());
11456 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457 if (common->utf)
11458 {
11459 set_jumps(no_match, LABEL());
11460 if (use_tmp)
11461 {
11462 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11463 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11464 }
11465 else
11466 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11467 }
11468 else
11469 #endif
11470 {
11471 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11472 set_jumps(no_match, LABEL());
11473 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11474 }
11475
11476 if (early_fail_ptr != 0)
11477 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11478 }
11479
11480 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11481 break;
11482
11483 case OP_MINSTAR:
11484 if (private_data_ptr == 0)
11485 allocate_stack(common, 1);
11486 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11487 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11488 if (early_fail_ptr != 0)
11489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11490 break;
11491
11492 case OP_MINUPTO:
11493 SLJIT_ASSERT(early_fail_ptr == 0);
11494 if (private_data_ptr == 0)
11495 allocate_stack(common, 2);
11496 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11497 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11498 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11499 break;
11500
11501 case OP_QUERY:
11502 case OP_MINQUERY:
11503 SLJIT_ASSERT(early_fail_ptr == 0);
11504 if (private_data_ptr == 0)
11505 allocate_stack(common, 1);
11506 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11507 if (opcode == OP_QUERY)
11508 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11509 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11510 break;
11511
11512 case OP_EXACT:
11513 break;
11514
11515 case OP_POSSTAR:
11516 #if defined SUPPORT_UNICODE
11517 if (type == OP_ALLANY && !common->invalid_utf)
11518 #else
11519 if (type == OP_ALLANY)
11520 #endif
11521 {
11522 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11523 process_partial_match(common);
11524 if (early_fail_ptr != 0)
11525 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11526 break;
11527 }
11528
11529 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11530 if (common->utf)
11531 {
11532 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11533 detect_partial_match(common, &no_match);
11534 label = LABEL();
11535 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11536 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11537 detect_partial_match_to(common, label);
11538
11539 set_jumps(no_match, LABEL());
11540 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11541 if (early_fail_ptr != 0)
11542 {
11543 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11544 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11545 else
11546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11547 }
11548 break;
11549 }
11550 #endif
11551
11552 detect_partial_match(common, &no_match);
11553 label = LABEL();
11554 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11555 detect_partial_match_to(common, label);
11556 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11557
11558 set_jumps(no_char1_match, LABEL());
11559 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11560 set_jumps(no_match, LABEL());
11561 if (early_fail_ptr != 0)
11562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11563 break;
11564
11565 case OP_POSUPTO:
11566 SLJIT_ASSERT(early_fail_ptr == 0);
11567 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11568 if (common->utf)
11569 {
11570 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11571 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11572
11573 detect_partial_match(common, &no_match);
11574 label = LABEL();
11575 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11576 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11577 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11578 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11579 detect_partial_match_to(common, label);
11580
11581 set_jumps(no_match, LABEL());
11582 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11583 break;
11584 }
11585 #endif
11586
11587 if (type == OP_ALLANY)
11588 {
11589 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11590
11591 if (common->mode == PCRE2_JIT_COMPLETE)
11592 {
11593 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11594 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11595 }
11596 else
11597 {
11598 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11599 process_partial_match(common);
11600 JUMPHERE(jump);
11601 }
11602 break;
11603 }
11604
11605 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11606
11607 detect_partial_match(common, &no_match);
11608 label = LABEL();
11609 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11610 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11611 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11612 detect_partial_match_to(common, label);
11613 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11614
11615 set_jumps(no_char1_match, LABEL());
11616 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11617 set_jumps(no_match, LABEL());
11618 break;
11619
11620 case OP_POSQUERY:
11621 SLJIT_ASSERT(early_fail_ptr == 0);
11622 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11623 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11624 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11625 set_jumps(no_match, LABEL());
11626 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11627 break;
11628
11629 default:
11630 SLJIT_UNREACHABLE();
11631 break;
11632 }
11633
11634 count_match(common);
11635 return end;
11636 }
11637
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11638 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11639 {
11640 DEFINE_COMPILER;
11641 backtrack_common *backtrack;
11642
11643 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11644
11645 if (*cc == OP_FAIL)
11646 {
11647 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11648 return cc + 1;
11649 }
11650
11651 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11652 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11653
11654 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11655 {
11656 /* No need to check notempty conditions. */
11657 if (common->accept_label == NULL)
11658 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11659 else
11660 JUMPTO(SLJIT_JUMP, common->accept_label);
11661 return cc + 1;
11662 }
11663
11664 if (common->accept_label == NULL)
11665 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11666 else
11667 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11668
11669 if (HAS_VIRTUAL_REGISTERS)
11670 {
11671 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11672 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11673 }
11674 else
11675 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11676
11677 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11678 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11679 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11680 if (common->accept_label == NULL)
11681 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11682 else
11683 JUMPTO(SLJIT_ZERO, common->accept_label);
11684
11685 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11686 if (common->accept_label == NULL)
11687 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11688 else
11689 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11690 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11691 return cc + 1;
11692 }
11693
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11694 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11695 {
11696 DEFINE_COMPILER;
11697 int offset = GET2(cc, 1);
11698 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11699
11700 /* Data will be discarded anyway... */
11701 if (common->currententry != NULL)
11702 return cc + 1 + IMM2_SIZE;
11703
11704 if (!optimized_cbracket)
11705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11706 offset <<= 1;
11707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11708 if (!optimized_cbracket)
11709 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11710 return cc + 1 + IMM2_SIZE;
11711 }
11712
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11713 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11714 {
11715 DEFINE_COMPILER;
11716 backtrack_common *backtrack;
11717 PCRE2_UCHAR opcode = *cc;
11718 PCRE2_SPTR ccend = cc + 1;
11719
11720 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11721 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11722 ccend += 2 + cc[1];
11723
11724 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11725
11726 if (opcode == OP_SKIP)
11727 {
11728 allocate_stack(common, 1);
11729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11730 return ccend;
11731 }
11732
11733 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11734 {
11735 if (HAS_VIRTUAL_REGISTERS)
11736 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11739 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11740 }
11741
11742 return ccend;
11743 }
11744
11745 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11746
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11747 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11748 {
11749 DEFINE_COMPILER;
11750 backtrack_common *backtrack;
11751 BOOL needs_control_head;
11752 int size;
11753
11754 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11755 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11756 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11757 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11758 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11759
11760 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11761 size = 3 + (size < 0 ? 0 : size);
11762
11763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11764 allocate_stack(common, size);
11765 if (size > 3)
11766 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11767 else
11768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11769 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11771 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11772
11773 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11774 if (size >= 0)
11775 init_frame(common, cc, ccend, size - 1, 0);
11776 }
11777
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11778 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11779 {
11780 DEFINE_COMPILER;
11781 backtrack_common *backtrack;
11782 BOOL has_then_trap = FALSE;
11783 then_trap_backtrack *save_then_trap = NULL;
11784
11785 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11786
11787 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11788 {
11789 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11790 has_then_trap = TRUE;
11791 save_then_trap = common->then_trap;
11792 /* Tail item on backtrack. */
11793 compile_then_trap_matchingpath(common, cc, ccend, parent);
11794 }
11795
11796 while (cc < ccend)
11797 {
11798 switch(*cc)
11799 {
11800 case OP_SOD:
11801 case OP_SOM:
11802 case OP_NOT_WORD_BOUNDARY:
11803 case OP_WORD_BOUNDARY:
11804 case OP_EODN:
11805 case OP_EOD:
11806 case OP_DOLL:
11807 case OP_DOLLM:
11808 case OP_CIRC:
11809 case OP_CIRCM:
11810 case OP_REVERSE:
11811 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11812 break;
11813
11814 case OP_NOT_DIGIT:
11815 case OP_DIGIT:
11816 case OP_NOT_WHITESPACE:
11817 case OP_WHITESPACE:
11818 case OP_NOT_WORDCHAR:
11819 case OP_WORDCHAR:
11820 case OP_ANY:
11821 case OP_ALLANY:
11822 case OP_ANYBYTE:
11823 case OP_NOTPROP:
11824 case OP_PROP:
11825 case OP_ANYNL:
11826 case OP_NOT_HSPACE:
11827 case OP_HSPACE:
11828 case OP_NOT_VSPACE:
11829 case OP_VSPACE:
11830 case OP_EXTUNI:
11831 case OP_NOT:
11832 case OP_NOTI:
11833 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11834 break;
11835
11836 case OP_SET_SOM:
11837 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11839 allocate_stack(common, 1);
11840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11842 cc++;
11843 break;
11844
11845 case OP_CHAR:
11846 case OP_CHARI:
11847 if (common->mode == PCRE2_JIT_COMPLETE)
11848 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11849 else
11850 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11851 break;
11852
11853 case OP_STAR:
11854 case OP_MINSTAR:
11855 case OP_PLUS:
11856 case OP_MINPLUS:
11857 case OP_QUERY:
11858 case OP_MINQUERY:
11859 case OP_UPTO:
11860 case OP_MINUPTO:
11861 case OP_EXACT:
11862 case OP_POSSTAR:
11863 case OP_POSPLUS:
11864 case OP_POSQUERY:
11865 case OP_POSUPTO:
11866 case OP_STARI:
11867 case OP_MINSTARI:
11868 case OP_PLUSI:
11869 case OP_MINPLUSI:
11870 case OP_QUERYI:
11871 case OP_MINQUERYI:
11872 case OP_UPTOI:
11873 case OP_MINUPTOI:
11874 case OP_EXACTI:
11875 case OP_POSSTARI:
11876 case OP_POSPLUSI:
11877 case OP_POSQUERYI:
11878 case OP_POSUPTOI:
11879 case OP_NOTSTAR:
11880 case OP_NOTMINSTAR:
11881 case OP_NOTPLUS:
11882 case OP_NOTMINPLUS:
11883 case OP_NOTQUERY:
11884 case OP_NOTMINQUERY:
11885 case OP_NOTUPTO:
11886 case OP_NOTMINUPTO:
11887 case OP_NOTEXACT:
11888 case OP_NOTPOSSTAR:
11889 case OP_NOTPOSPLUS:
11890 case OP_NOTPOSQUERY:
11891 case OP_NOTPOSUPTO:
11892 case OP_NOTSTARI:
11893 case OP_NOTMINSTARI:
11894 case OP_NOTPLUSI:
11895 case OP_NOTMINPLUSI:
11896 case OP_NOTQUERYI:
11897 case OP_NOTMINQUERYI:
11898 case OP_NOTUPTOI:
11899 case OP_NOTMINUPTOI:
11900 case OP_NOTEXACTI:
11901 case OP_NOTPOSSTARI:
11902 case OP_NOTPOSPLUSI:
11903 case OP_NOTPOSQUERYI:
11904 case OP_NOTPOSUPTOI:
11905 case OP_TYPESTAR:
11906 case OP_TYPEMINSTAR:
11907 case OP_TYPEPLUS:
11908 case OP_TYPEMINPLUS:
11909 case OP_TYPEQUERY:
11910 case OP_TYPEMINQUERY:
11911 case OP_TYPEUPTO:
11912 case OP_TYPEMINUPTO:
11913 case OP_TYPEEXACT:
11914 case OP_TYPEPOSSTAR:
11915 case OP_TYPEPOSPLUS:
11916 case OP_TYPEPOSQUERY:
11917 case OP_TYPEPOSUPTO:
11918 cc = compile_iterator_matchingpath(common, cc, parent);
11919 break;
11920
11921 case OP_CLASS:
11922 case OP_NCLASS:
11923 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11924 cc = compile_iterator_matchingpath(common, cc, parent);
11925 else
11926 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11927 break;
11928
11929 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11930 case OP_XCLASS:
11931 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
11932 cc = compile_iterator_matchingpath(common, cc, parent);
11933 else
11934 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11935 break;
11936 #endif
11937
11938 case OP_REF:
11939 case OP_REFI:
11940 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
11941 cc = compile_ref_iterator_matchingpath(common, cc, parent);
11942 else
11943 {
11944 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
11945 cc += 1 + IMM2_SIZE;
11946 }
11947 break;
11948
11949 case OP_DNREF:
11950 case OP_DNREFI:
11951 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
11952 cc = compile_ref_iterator_matchingpath(common, cc, parent);
11953 else
11954 {
11955 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11956 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
11957 cc += 1 + 2 * IMM2_SIZE;
11958 }
11959 break;
11960
11961 case OP_RECURSE:
11962 cc = compile_recurse_matchingpath(common, cc, parent);
11963 break;
11964
11965 case OP_CALLOUT:
11966 case OP_CALLOUT_STR:
11967 cc = compile_callout_matchingpath(common, cc, parent);
11968 break;
11969
11970 case OP_ASSERT:
11971 case OP_ASSERT_NOT:
11972 case OP_ASSERTBACK:
11973 case OP_ASSERTBACK_NOT:
11974 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11975 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11976 break;
11977
11978 case OP_BRAMINZERO:
11979 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
11980 cc = bracketend(cc + 1);
11981 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
11982 {
11983 allocate_stack(common, 1);
11984 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11985 }
11986 else
11987 {
11988 allocate_stack(common, 2);
11989 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11990 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
11991 }
11992 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
11993 count_match(common);
11994 break;
11995
11996 case OP_ASSERT_NA:
11997 case OP_ASSERTBACK_NA:
11998 case OP_ONCE:
11999 case OP_SCRIPT_RUN:
12000 case OP_BRA:
12001 case OP_CBRA:
12002 case OP_COND:
12003 case OP_SBRA:
12004 case OP_SCBRA:
12005 case OP_SCOND:
12006 cc = compile_bracket_matchingpath(common, cc, parent);
12007 break;
12008
12009 case OP_BRAZERO:
12010 if (cc[1] > OP_ASSERTBACK_NOT)
12011 cc = compile_bracket_matchingpath(common, cc, parent);
12012 else
12013 {
12014 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12015 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12016 }
12017 break;
12018
12019 case OP_BRAPOS:
12020 case OP_CBRAPOS:
12021 case OP_SBRAPOS:
12022 case OP_SCBRAPOS:
12023 case OP_BRAPOSZERO:
12024 cc = compile_bracketpos_matchingpath(common, cc, parent);
12025 break;
12026
12027 case OP_MARK:
12028 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12029 SLJIT_ASSERT(common->mark_ptr != 0);
12030 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12031 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12032 if (HAS_VIRTUAL_REGISTERS)
12033 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12035 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12037 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12038 if (common->has_skip_arg)
12039 {
12040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12046 }
12047 cc += 1 + 2 + cc[1];
12048 break;
12049
12050 case OP_PRUNE:
12051 case OP_PRUNE_ARG:
12052 case OP_SKIP:
12053 case OP_SKIP_ARG:
12054 case OP_THEN:
12055 case OP_THEN_ARG:
12056 case OP_COMMIT:
12057 case OP_COMMIT_ARG:
12058 cc = compile_control_verb_matchingpath(common, cc, parent);
12059 break;
12060
12061 case OP_FAIL:
12062 case OP_ACCEPT:
12063 case OP_ASSERT_ACCEPT:
12064 cc = compile_fail_accept_matchingpath(common, cc, parent);
12065 break;
12066
12067 case OP_CLOSE:
12068 cc = compile_close_matchingpath(common, cc);
12069 break;
12070
12071 case OP_SKIPZERO:
12072 cc = bracketend(cc + 1);
12073 break;
12074
12075 default:
12076 SLJIT_UNREACHABLE();
12077 return;
12078 }
12079 if (cc == NULL)
12080 return;
12081 }
12082
12083 if (has_then_trap)
12084 {
12085 /* Head item on backtrack. */
12086 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12087 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12088 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12089 common->then_trap = save_then_trap;
12090 }
12091 SLJIT_ASSERT(cc == ccend);
12092 }
12093
12094 #undef PUSH_BACKTRACK
12095 #undef PUSH_BACKTRACK_NOVALUE
12096 #undef BACKTRACK_AS
12097
12098 #define COMPILE_BACKTRACKINGPATH(current) \
12099 do \
12100 { \
12101 compile_backtrackingpath(common, (current)); \
12102 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12103 return; \
12104 } \
12105 while (0)
12106
12107 #define CURRENT_AS(type) ((type *)current)
12108
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12109 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12110 {
12111 DEFINE_COMPILER;
12112 PCRE2_SPTR cc = current->cc;
12113 PCRE2_UCHAR opcode;
12114 PCRE2_UCHAR type;
12115 sljit_u32 max = 0, exact;
12116 struct sljit_label *label = NULL;
12117 struct sljit_jump *jump = NULL;
12118 jump_list *jumplist = NULL;
12119 PCRE2_SPTR end;
12120 int private_data_ptr = PRIVATE_DATA(cc);
12121 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12122 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12123 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12124
12125 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12126
12127 switch(opcode)
12128 {
12129 case OP_STAR:
12130 case OP_UPTO:
12131 if (type == OP_ANYNL || type == OP_EXTUNI)
12132 {
12133 SLJIT_ASSERT(private_data_ptr == 0);
12134 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12135 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12136 free_stack(common, 1);
12137 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12138 }
12139 else
12140 {
12141 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12142 {
12143 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12144 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12145 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12146
12147 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12148 label = LABEL();
12149 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12150 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12151 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12152 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12153 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12154 move_back(common, NULL, TRUE);
12155 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12156 }
12157 else
12158 {
12159 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12160 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12161 move_back(common, NULL, TRUE);
12162 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12163 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12164 }
12165 JUMPHERE(jump);
12166 if (private_data_ptr == 0)
12167 free_stack(common, 2);
12168 }
12169 break;
12170
12171 case OP_MINSTAR:
12172 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12173 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12174 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12175 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12176 set_jumps(jumplist, LABEL());
12177 if (private_data_ptr == 0)
12178 free_stack(common, 1);
12179 break;
12180
12181 case OP_MINUPTO:
12182 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12183 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12184 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12185 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12186
12187 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12188 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12189 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12190 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12191
12192 set_jumps(jumplist, LABEL());
12193 if (private_data_ptr == 0)
12194 free_stack(common, 2);
12195 break;
12196
12197 case OP_QUERY:
12198 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12199 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12200 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12201 jump = JUMP(SLJIT_JUMP);
12202 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12203 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12204 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12205 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12206 JUMPHERE(jump);
12207 if (private_data_ptr == 0)
12208 free_stack(common, 1);
12209 break;
12210
12211 case OP_MINQUERY:
12212 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12213 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12214 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12215 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12216 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12217 set_jumps(jumplist, LABEL());
12218 JUMPHERE(jump);
12219 if (private_data_ptr == 0)
12220 free_stack(common, 1);
12221 break;
12222
12223 case OP_EXACT:
12224 case OP_POSSTAR:
12225 case OP_POSQUERY:
12226 case OP_POSUPTO:
12227 break;
12228
12229 default:
12230 SLJIT_UNREACHABLE();
12231 break;
12232 }
12233
12234 set_jumps(current->topbacktracks, LABEL());
12235 }
12236
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12237 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12238 {
12239 DEFINE_COMPILER;
12240 PCRE2_SPTR cc = current->cc;
12241 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12242 PCRE2_UCHAR type;
12243
12244 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12245
12246 if ((type & 0x1) == 0)
12247 {
12248 /* Maximize case. */
12249 set_jumps(current->topbacktracks, LABEL());
12250 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12251 free_stack(common, 1);
12252 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12253 return;
12254 }
12255
12256 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12257 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12258 set_jumps(current->topbacktracks, LABEL());
12259 free_stack(common, ref ? 2 : 3);
12260 }
12261
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12262 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12263 {
12264 DEFINE_COMPILER;
12265 recurse_entry *entry;
12266
12267 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12268 {
12269 entry = CURRENT_AS(recurse_backtrack)->entry;
12270 if (entry->backtrack_label == NULL)
12271 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12272 else
12273 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12274 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12275 }
12276 else
12277 compile_backtrackingpath(common, current->top);
12278
12279 set_jumps(current->topbacktracks, LABEL());
12280 }
12281
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12282 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12283 {
12284 DEFINE_COMPILER;
12285 PCRE2_SPTR cc = current->cc;
12286 PCRE2_UCHAR bra = OP_BRA;
12287 struct sljit_jump *brajump = NULL;
12288
12289 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12290 if (*cc == OP_BRAZERO)
12291 {
12292 bra = *cc;
12293 cc++;
12294 }
12295
12296 if (bra == OP_BRAZERO)
12297 {
12298 SLJIT_ASSERT(current->topbacktracks == NULL);
12299 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12300 }
12301
12302 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12303 {
12304 set_jumps(current->topbacktracks, LABEL());
12305
12306 if (bra == OP_BRAZERO)
12307 {
12308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12309 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12310 free_stack(common, 1);
12311 }
12312 return;
12313 }
12314
12315 if (bra == OP_BRAZERO)
12316 {
12317 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12318 {
12319 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12320 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12321 free_stack(common, 1);
12322 return;
12323 }
12324 free_stack(common, 1);
12325 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12326 }
12327
12328 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12329 {
12330 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12331 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12333 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12335
12336 set_jumps(current->topbacktracks, LABEL());
12337 }
12338 else
12339 set_jumps(current->topbacktracks, LABEL());
12340
12341 if (bra == OP_BRAZERO)
12342 {
12343 /* We know there is enough place on the stack. */
12344 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12346 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12347 JUMPHERE(brajump);
12348 }
12349 }
12350
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12351 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12352 {
12353 DEFINE_COMPILER;
12354 int opcode, stacksize, alt_count, alt_max;
12355 int offset = 0;
12356 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12357 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12358 PCRE2_SPTR cc = current->cc;
12359 PCRE2_SPTR ccbegin;
12360 PCRE2_SPTR ccprev;
12361 PCRE2_UCHAR bra = OP_BRA;
12362 PCRE2_UCHAR ket;
12363 assert_backtrack *assert;
12364 BOOL has_alternatives;
12365 BOOL needs_control_head = FALSE;
12366 struct sljit_jump *brazero = NULL;
12367 struct sljit_jump *next_alt = NULL;
12368 struct sljit_jump *once = NULL;
12369 struct sljit_jump *cond = NULL;
12370 struct sljit_label *rmin_label = NULL;
12371 struct sljit_label *exact_label = NULL;
12372 struct sljit_put_label *put_label = NULL;
12373
12374 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12375 {
12376 bra = *cc;
12377 cc++;
12378 }
12379
12380 opcode = *cc;
12381 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12382 ket = *ccbegin;
12383 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12384 {
12385 repeat_ptr = PRIVATE_DATA(ccbegin);
12386 repeat_type = PRIVATE_DATA(ccbegin + 2);
12387 repeat_count = PRIVATE_DATA(ccbegin + 3);
12388 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12389 if (repeat_type == OP_UPTO)
12390 ket = OP_KETRMAX;
12391 if (repeat_type == OP_MINUPTO)
12392 ket = OP_KETRMIN;
12393 }
12394 ccbegin = cc;
12395 cc += GET(cc, 1);
12396 has_alternatives = *cc == OP_ALT;
12397 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12398 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12399 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12400 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12401 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12402 opcode = OP_SCOND;
12403
12404 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12405
12406 /* Decoding the needs_control_head in framesize. */
12407 if (opcode == OP_ONCE)
12408 {
12409 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12410 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12411 }
12412
12413 if (ket != OP_KET && repeat_type != 0)
12414 {
12415 /* TMP1 is used in OP_KETRMIN below. */
12416 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12417 free_stack(common, 1);
12418 if (repeat_type == OP_UPTO)
12419 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12420 else
12421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12422 }
12423
12424 if (ket == OP_KETRMAX)
12425 {
12426 if (bra == OP_BRAZERO)
12427 {
12428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12429 free_stack(common, 1);
12430 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12431 }
12432 }
12433 else if (ket == OP_KETRMIN)
12434 {
12435 if (bra != OP_BRAMINZERO)
12436 {
12437 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12438 if (repeat_type != 0)
12439 {
12440 /* TMP1 was set a few lines above. */
12441 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12442 /* Drop STR_PTR for non-greedy plus quantifier. */
12443 if (opcode != OP_ONCE)
12444 free_stack(common, 1);
12445 }
12446 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12447 {
12448 /* Checking zero-length iteration. */
12449 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12450 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12451 else
12452 {
12453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12454 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12455 }
12456 /* Drop STR_PTR for non-greedy plus quantifier. */
12457 if (opcode != OP_ONCE)
12458 free_stack(common, 1);
12459 }
12460 else
12461 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12462 }
12463 rmin_label = LABEL();
12464 if (repeat_type != 0)
12465 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12466 }
12467 else if (bra == OP_BRAZERO)
12468 {
12469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12470 free_stack(common, 1);
12471 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12472 }
12473 else if (repeat_type == OP_EXACT)
12474 {
12475 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12476 exact_label = LABEL();
12477 }
12478
12479 if (offset != 0)
12480 {
12481 if (common->capture_last_ptr != 0)
12482 {
12483 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12484 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12485 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12488 free_stack(common, 3);
12489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12491 }
12492 else if (common->optimized_cbracket[offset >> 1] == 0)
12493 {
12494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12496 free_stack(common, 2);
12497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12499 }
12500 }
12501
12502 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12503 {
12504 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12505 {
12506 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12507 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12508 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12509 }
12510 once = JUMP(SLJIT_JUMP);
12511 }
12512 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12513 {
12514 if (has_alternatives)
12515 {
12516 /* Always exactly one alternative. */
12517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12518 free_stack(common, 1);
12519
12520 alt_max = 2;
12521 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12522 }
12523 }
12524 else if (has_alternatives)
12525 {
12526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12527 free_stack(common, 1);
12528
12529 if (alt_max > 3)
12530 {
12531 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12532
12533 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12534 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12535 sljit_emit_op0(compiler, SLJIT_ENDBR);
12536 }
12537 else
12538 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12539 }
12540
12541 COMPILE_BACKTRACKINGPATH(current->top);
12542 if (current->topbacktracks)
12543 set_jumps(current->topbacktracks, LABEL());
12544
12545 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12546 {
12547 /* Conditional block always has at most one alternative. */
12548 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12549 {
12550 SLJIT_ASSERT(has_alternatives);
12551 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12552 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12553 {
12554 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12555 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12556 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12557 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12558 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12559 }
12560 cond = JUMP(SLJIT_JUMP);
12561 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12562 }
12563 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12564 {
12565 SLJIT_ASSERT(has_alternatives);
12566 cond = JUMP(SLJIT_JUMP);
12567 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12568 }
12569 else
12570 SLJIT_ASSERT(!has_alternatives);
12571 }
12572
12573 if (has_alternatives)
12574 {
12575 alt_count = 1;
12576 do
12577 {
12578 current->top = NULL;
12579 current->topbacktracks = NULL;
12580 current->nextbacktracks = NULL;
12581 /* Conditional blocks always have an additional alternative, even if it is empty. */
12582 if (*cc == OP_ALT)
12583 {
12584 ccprev = cc + 1 + LINK_SIZE;
12585 cc += GET(cc, 1);
12586 if (opcode != OP_COND && opcode != OP_SCOND)
12587 {
12588 if (opcode != OP_ONCE)
12589 {
12590 if (private_data_ptr != 0)
12591 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12592 else
12593 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12594 }
12595 else
12596 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12597 }
12598 compile_matchingpath(common, ccprev, cc, current);
12599 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12600 return;
12601
12602 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12603 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12604
12605 if (opcode == OP_SCRIPT_RUN)
12606 match_script_run_common(common, private_data_ptr, current);
12607 }
12608
12609 /* Instructions after the current alternative is successfully matched. */
12610 /* There is a similar code in compile_bracket_matchingpath. */
12611 if (opcode == OP_ONCE)
12612 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12613
12614 stacksize = 0;
12615 if (repeat_type == OP_MINUPTO)
12616 {
12617 /* We need to preserve the counter. TMP2 will be used below. */
12618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12619 stacksize++;
12620 }
12621 if (ket != OP_KET || bra != OP_BRA)
12622 stacksize++;
12623 if (offset != 0)
12624 {
12625 if (common->capture_last_ptr != 0)
12626 stacksize++;
12627 if (common->optimized_cbracket[offset >> 1] == 0)
12628 stacksize += 2;
12629 }
12630 if (opcode != OP_ONCE)
12631 stacksize++;
12632
12633 if (stacksize > 0)
12634 allocate_stack(common, stacksize);
12635
12636 stacksize = 0;
12637 if (repeat_type == OP_MINUPTO)
12638 {
12639 /* TMP2 was set above. */
12640 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12641 stacksize++;
12642 }
12643
12644 if (ket != OP_KET || bra != OP_BRA)
12645 {
12646 if (ket != OP_KET)
12647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12648 else
12649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12650 stacksize++;
12651 }
12652
12653 if (offset != 0)
12654 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12655
12656 if (opcode != OP_ONCE)
12657 {
12658 if (alt_max <= 3)
12659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12660 else
12661 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12662 }
12663
12664 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12665 {
12666 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12667 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12668 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12669 }
12670
12671 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12672
12673 if (opcode != OP_ONCE)
12674 {
12675 if (alt_max <= 3)
12676 {
12677 JUMPHERE(next_alt);
12678 alt_count++;
12679 if (alt_count < alt_max)
12680 {
12681 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12682 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12683 }
12684 }
12685 else
12686 {
12687 sljit_set_put_label(put_label, LABEL());
12688 sljit_emit_op0(compiler, SLJIT_ENDBR);
12689 }
12690 }
12691
12692 COMPILE_BACKTRACKINGPATH(current->top);
12693 if (current->topbacktracks)
12694 set_jumps(current->topbacktracks, LABEL());
12695 SLJIT_ASSERT(!current->nextbacktracks);
12696 }
12697 while (*cc == OP_ALT);
12698
12699 if (cond != NULL)
12700 {
12701 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12702 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12703 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12704 {
12705 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12706 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12707 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12708 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12709 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12710 }
12711 JUMPHERE(cond);
12712 }
12713
12714 /* Free the STR_PTR. */
12715 if (private_data_ptr == 0)
12716 free_stack(common, 1);
12717 }
12718
12719 if (offset != 0)
12720 {
12721 /* Using both tmp register is better for instruction scheduling. */
12722 if (common->optimized_cbracket[offset >> 1] != 0)
12723 {
12724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12726 free_stack(common, 2);
12727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12729 }
12730 else
12731 {
12732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12733 free_stack(common, 1);
12734 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12735 }
12736 }
12737 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12738 {
12739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12740 free_stack(common, 1);
12741 }
12742 else if (opcode == OP_ONCE)
12743 {
12744 cc = ccbegin + GET(ccbegin, 1);
12745 stacksize = needs_control_head ? 1 : 0;
12746
12747 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12748 {
12749 /* Reset head and drop saved frame. */
12750 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12751 }
12752 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12753 {
12754 /* The STR_PTR must be released. */
12755 stacksize++;
12756 }
12757
12758 if (stacksize > 0)
12759 free_stack(common, stacksize);
12760
12761 JUMPHERE(once);
12762 /* Restore previous private_data_ptr */
12763 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12764 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12765 else if (ket == OP_KETRMIN)
12766 {
12767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12768 /* See the comment below. */
12769 free_stack(common, 2);
12770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12771 }
12772 }
12773
12774 if (repeat_type == OP_EXACT)
12775 {
12776 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12778 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12779 }
12780 else if (ket == OP_KETRMAX)
12781 {
12782 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12783 if (bra != OP_BRAZERO)
12784 free_stack(common, 1);
12785
12786 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12787 if (bra == OP_BRAZERO)
12788 {
12789 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12790 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12791 JUMPHERE(brazero);
12792 free_stack(common, 1);
12793 }
12794 }
12795 else if (ket == OP_KETRMIN)
12796 {
12797 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12798
12799 /* OP_ONCE removes everything in case of a backtrack, so we don't
12800 need to explicitly release the STR_PTR. The extra release would
12801 affect badly the free_stack(2) above. */
12802 if (opcode != OP_ONCE)
12803 free_stack(common, 1);
12804 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12805 if (opcode == OP_ONCE)
12806 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12807 else if (bra == OP_BRAMINZERO)
12808 free_stack(common, 1);
12809 }
12810 else if (bra == OP_BRAZERO)
12811 {
12812 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12813 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12814 JUMPHERE(brazero);
12815 }
12816 }
12817
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12818 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12819 {
12820 DEFINE_COMPILER;
12821 int offset;
12822 struct sljit_jump *jump;
12823
12824 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12825 {
12826 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12827 {
12828 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12829 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12830 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12832 if (common->capture_last_ptr != 0)
12833 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12834 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12835 if (common->capture_last_ptr != 0)
12836 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12837 }
12838 set_jumps(current->topbacktracks, LABEL());
12839 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12840 return;
12841 }
12842
12843 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12844 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12845 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12846
12847 if (current->topbacktracks)
12848 {
12849 jump = JUMP(SLJIT_JUMP);
12850 set_jumps(current->topbacktracks, LABEL());
12851 /* Drop the stack frame. */
12852 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12853 JUMPHERE(jump);
12854 }
12855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12856 }
12857
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12858 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12859 {
12860 assert_backtrack backtrack;
12861
12862 current->top = NULL;
12863 current->topbacktracks = NULL;
12864 current->nextbacktracks = NULL;
12865 if (current->cc[1] > OP_ASSERTBACK_NOT)
12866 {
12867 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12868 compile_bracket_matchingpath(common, current->cc, current);
12869 compile_bracket_backtrackingpath(common, current->top);
12870 }
12871 else
12872 {
12873 memset(&backtrack, 0, sizeof(backtrack));
12874 backtrack.common.cc = current->cc;
12875 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12876 /* Manual call of compile_assert_matchingpath. */
12877 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12878 }
12879 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12880 }
12881
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12882 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12883 {
12884 DEFINE_COMPILER;
12885 PCRE2_UCHAR opcode = *current->cc;
12886 struct sljit_label *loop;
12887 struct sljit_jump *jump;
12888
12889 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12890 {
12891 if (common->then_trap != NULL)
12892 {
12893 SLJIT_ASSERT(common->control_head_ptr != 0);
12894
12895 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12897 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12898 jump = JUMP(SLJIT_JUMP);
12899
12900 loop = LABEL();
12901 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902 JUMPHERE(jump);
12903 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12904 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12905 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12906 return;
12907 }
12908 else if (!common->local_quit_available && common->in_positive_assertion)
12909 {
12910 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12911 return;
12912 }
12913 }
12914
12915 if (common->local_quit_available)
12916 {
12917 /* Abort match with a fail. */
12918 if (common->quit_label == NULL)
12919 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12920 else
12921 JUMPTO(SLJIT_JUMP, common->quit_label);
12922 return;
12923 }
12924
12925 if (opcode == OP_SKIP_ARG)
12926 {
12927 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12929 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12930 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
12931
12932 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12933 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12934 return;
12935 }
12936
12937 if (opcode == OP_SKIP)
12938 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12939 else
12940 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12941 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12942 }
12943
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)12944 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12945 {
12946 DEFINE_COMPILER;
12947 struct sljit_jump *jump;
12948 int size;
12949
12950 if (CURRENT_AS(then_trap_backtrack)->then_trap)
12951 {
12952 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12953 return;
12954 }
12955
12956 size = CURRENT_AS(then_trap_backtrack)->framesize;
12957 size = 3 + (size < 0 ? 0 : size);
12958
12959 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
12960 free_stack(common, size);
12961 jump = JUMP(SLJIT_JUMP);
12962
12963 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
12964 /* STACK_TOP is set by THEN. */
12965 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
12966 {
12967 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12968 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
12969 }
12970 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12971 free_stack(common, 3);
12972
12973 JUMPHERE(jump);
12974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
12975 }
12976
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)12977 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12978 {
12979 DEFINE_COMPILER;
12980 then_trap_backtrack *save_then_trap = common->then_trap;
12981
12982 while (current)
12983 {
12984 if (current->nextbacktracks != NULL)
12985 set_jumps(current->nextbacktracks, LABEL());
12986 switch(*current->cc)
12987 {
12988 case OP_SET_SOM:
12989 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12990 free_stack(common, 1);
12991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
12992 break;
12993
12994 case OP_STAR:
12995 case OP_MINSTAR:
12996 case OP_PLUS:
12997 case OP_MINPLUS:
12998 case OP_QUERY:
12999 case OP_MINQUERY:
13000 case OP_UPTO:
13001 case OP_MINUPTO:
13002 case OP_EXACT:
13003 case OP_POSSTAR:
13004 case OP_POSPLUS:
13005 case OP_POSQUERY:
13006 case OP_POSUPTO:
13007 case OP_STARI:
13008 case OP_MINSTARI:
13009 case OP_PLUSI:
13010 case OP_MINPLUSI:
13011 case OP_QUERYI:
13012 case OP_MINQUERYI:
13013 case OP_UPTOI:
13014 case OP_MINUPTOI:
13015 case OP_EXACTI:
13016 case OP_POSSTARI:
13017 case OP_POSPLUSI:
13018 case OP_POSQUERYI:
13019 case OP_POSUPTOI:
13020 case OP_NOTSTAR:
13021 case OP_NOTMINSTAR:
13022 case OP_NOTPLUS:
13023 case OP_NOTMINPLUS:
13024 case OP_NOTQUERY:
13025 case OP_NOTMINQUERY:
13026 case OP_NOTUPTO:
13027 case OP_NOTMINUPTO:
13028 case OP_NOTEXACT:
13029 case OP_NOTPOSSTAR:
13030 case OP_NOTPOSPLUS:
13031 case OP_NOTPOSQUERY:
13032 case OP_NOTPOSUPTO:
13033 case OP_NOTSTARI:
13034 case OP_NOTMINSTARI:
13035 case OP_NOTPLUSI:
13036 case OP_NOTMINPLUSI:
13037 case OP_NOTQUERYI:
13038 case OP_NOTMINQUERYI:
13039 case OP_NOTUPTOI:
13040 case OP_NOTMINUPTOI:
13041 case OP_NOTEXACTI:
13042 case OP_NOTPOSSTARI:
13043 case OP_NOTPOSPLUSI:
13044 case OP_NOTPOSQUERYI:
13045 case OP_NOTPOSUPTOI:
13046 case OP_TYPESTAR:
13047 case OP_TYPEMINSTAR:
13048 case OP_TYPEPLUS:
13049 case OP_TYPEMINPLUS:
13050 case OP_TYPEQUERY:
13051 case OP_TYPEMINQUERY:
13052 case OP_TYPEUPTO:
13053 case OP_TYPEMINUPTO:
13054 case OP_TYPEEXACT:
13055 case OP_TYPEPOSSTAR:
13056 case OP_TYPEPOSPLUS:
13057 case OP_TYPEPOSQUERY:
13058 case OP_TYPEPOSUPTO:
13059 case OP_CLASS:
13060 case OP_NCLASS:
13061 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13062 case OP_XCLASS:
13063 #endif
13064 compile_iterator_backtrackingpath(common, current);
13065 break;
13066
13067 case OP_REF:
13068 case OP_REFI:
13069 case OP_DNREF:
13070 case OP_DNREFI:
13071 compile_ref_iterator_backtrackingpath(common, current);
13072 break;
13073
13074 case OP_RECURSE:
13075 compile_recurse_backtrackingpath(common, current);
13076 break;
13077
13078 case OP_ASSERT:
13079 case OP_ASSERT_NOT:
13080 case OP_ASSERTBACK:
13081 case OP_ASSERTBACK_NOT:
13082 compile_assert_backtrackingpath(common, current);
13083 break;
13084
13085 case OP_ASSERT_NA:
13086 case OP_ASSERTBACK_NA:
13087 case OP_ONCE:
13088 case OP_SCRIPT_RUN:
13089 case OP_BRA:
13090 case OP_CBRA:
13091 case OP_COND:
13092 case OP_SBRA:
13093 case OP_SCBRA:
13094 case OP_SCOND:
13095 compile_bracket_backtrackingpath(common, current);
13096 break;
13097
13098 case OP_BRAZERO:
13099 if (current->cc[1] > OP_ASSERTBACK_NOT)
13100 compile_bracket_backtrackingpath(common, current);
13101 else
13102 compile_assert_backtrackingpath(common, current);
13103 break;
13104
13105 case OP_BRAPOS:
13106 case OP_CBRAPOS:
13107 case OP_SBRAPOS:
13108 case OP_SCBRAPOS:
13109 case OP_BRAPOSZERO:
13110 compile_bracketpos_backtrackingpath(common, current);
13111 break;
13112
13113 case OP_BRAMINZERO:
13114 compile_braminzero_backtrackingpath(common, current);
13115 break;
13116
13117 case OP_MARK:
13118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13119 if (common->has_skip_arg)
13120 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121 free_stack(common, common->has_skip_arg ? 5 : 1);
13122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13123 if (common->has_skip_arg)
13124 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13125 break;
13126
13127 case OP_THEN:
13128 case OP_THEN_ARG:
13129 case OP_PRUNE:
13130 case OP_PRUNE_ARG:
13131 case OP_SKIP:
13132 case OP_SKIP_ARG:
13133 compile_control_verb_backtrackingpath(common, current);
13134 break;
13135
13136 case OP_COMMIT:
13137 case OP_COMMIT_ARG:
13138 if (!common->local_quit_available)
13139 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13140 if (common->quit_label == NULL)
13141 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13142 else
13143 JUMPTO(SLJIT_JUMP, common->quit_label);
13144 break;
13145
13146 case OP_CALLOUT:
13147 case OP_CALLOUT_STR:
13148 case OP_FAIL:
13149 case OP_ACCEPT:
13150 case OP_ASSERT_ACCEPT:
13151 set_jumps(current->topbacktracks, LABEL());
13152 break;
13153
13154 case OP_THEN_TRAP:
13155 /* A virtual opcode for then traps. */
13156 compile_then_trap_backtrackingpath(common, current);
13157 break;
13158
13159 default:
13160 SLJIT_UNREACHABLE();
13161 break;
13162 }
13163 current = current->prev;
13164 }
13165 common->then_trap = save_then_trap;
13166 }
13167
compile_recurse(compiler_common * common)13168 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13169 {
13170 DEFINE_COMPILER;
13171 PCRE2_SPTR cc = common->start + common->currententry->start;
13172 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13173 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13174 BOOL needs_control_head;
13175 BOOL has_quit;
13176 BOOL has_accept;
13177 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13178 int alt_count, alt_max, local_size;
13179 backtrack_common altbacktrack;
13180 jump_list *match = NULL;
13181 struct sljit_jump *next_alt = NULL;
13182 struct sljit_jump *accept_exit = NULL;
13183 struct sljit_label *quit;
13184 struct sljit_put_label *put_label = NULL;
13185
13186 /* Recurse captures then. */
13187 common->then_trap = NULL;
13188
13189 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13190
13191 alt_max = no_alternatives(cc);
13192 alt_count = 0;
13193
13194 /* Matching path. */
13195 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13196 common->currententry->entry_label = LABEL();
13197 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13198
13199 sljit_emit_fast_enter(compiler, TMP2, 0);
13200 count_match(common);
13201
13202 local_size = (alt_max > 1) ? 2 : 1;
13203
13204 /* (Reversed) stack layout:
13205 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13206
13207 allocate_stack(common, private_data_size + local_size);
13208 /* Save return address. */
13209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13210
13211 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13212
13213 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13215
13216 if (needs_control_head)
13217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13218
13219 if (alt_max > 1)
13220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13221
13222 memset(&altbacktrack, 0, sizeof(backtrack_common));
13223 common->quit_label = NULL;
13224 common->accept_label = NULL;
13225 common->quit = NULL;
13226 common->accept = NULL;
13227 altbacktrack.cc = ccbegin;
13228 cc += GET(cc, 1);
13229 while (1)
13230 {
13231 altbacktrack.top = NULL;
13232 altbacktrack.topbacktracks = NULL;
13233
13234 if (altbacktrack.cc != ccbegin)
13235 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13236
13237 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13238 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13239 return;
13240
13241 allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13242 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13243
13244 if (alt_max > 1 || has_accept)
13245 {
13246 if (alt_max > 3)
13247 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13248 else
13249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13250 }
13251
13252 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13253
13254 if (alt_count == 0)
13255 {
13256 /* Backtracking path entry. */
13257 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13258 common->currententry->backtrack_label = LABEL();
13259 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13260
13261 sljit_emit_fast_enter(compiler, TMP1, 0);
13262
13263 if (has_accept)
13264 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13265
13266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13267 /* Save return address. */
13268 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13269
13270 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13271
13272 if (alt_max > 1)
13273 {
13274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13275 free_stack(common, 2);
13276
13277 if (alt_max > 3)
13278 {
13279 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13280 sljit_set_put_label(put_label, LABEL());
13281 sljit_emit_op0(compiler, SLJIT_ENDBR);
13282 }
13283 else
13284 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13285 }
13286 else
13287 free_stack(common, has_accept ? 2 : 1);
13288 }
13289 else if (alt_max > 3)
13290 {
13291 sljit_set_put_label(put_label, LABEL());
13292 sljit_emit_op0(compiler, SLJIT_ENDBR);
13293 }
13294 else
13295 {
13296 JUMPHERE(next_alt);
13297 if (alt_count + 1 < alt_max)
13298 {
13299 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13300 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13301 }
13302 }
13303
13304 alt_count++;
13305
13306 compile_backtrackingpath(common, altbacktrack.top);
13307 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13308 return;
13309 set_jumps(altbacktrack.topbacktracks, LABEL());
13310
13311 if (*cc != OP_ALT)
13312 break;
13313
13314 altbacktrack.cc = cc + 1 + LINK_SIZE;
13315 cc += GET(cc, 1);
13316 }
13317
13318 /* No alternative is matched. */
13319
13320 quit = LABEL();
13321
13322 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13323
13324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13325 free_stack(common, private_data_size + local_size);
13326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13327 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13328
13329 if (common->quit != NULL)
13330 {
13331 SLJIT_ASSERT(has_quit);
13332
13333 set_jumps(common->quit, LABEL());
13334 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13335 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13336 JUMPTO(SLJIT_JUMP, quit);
13337 }
13338
13339 if (has_accept)
13340 {
13341 JUMPHERE(accept_exit);
13342 free_stack(common, 2);
13343
13344 /* Save return address. */
13345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13346
13347 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13348
13349 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13350 free_stack(common, private_data_size + local_size);
13351 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13352 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13353 }
13354
13355 if (common->accept != NULL)
13356 {
13357 SLJIT_ASSERT(has_accept);
13358
13359 set_jumps(common->accept, LABEL());
13360
13361 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13362 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13363
13364 allocate_stack(common, 2);
13365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13366 }
13367
13368 set_jumps(match, LABEL());
13369
13370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13371
13372 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13373
13374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13376 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13377 }
13378
13379 #undef COMPILE_BACKTRACKINGPATH
13380 #undef CURRENT_AS
13381
13382 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13383 (PCRE2_JIT_INVALID_UTF)
13384
jit_compile(pcre2_code * code,sljit_u32 mode)13385 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13386 {
13387 pcre2_real_code *re = (pcre2_real_code *)code;
13388 struct sljit_compiler *compiler;
13389 backtrack_common rootbacktrack;
13390 compiler_common common_data;
13391 compiler_common *common = &common_data;
13392 const sljit_u8 *tables = re->tables;
13393 void *allocator_data = &re->memctl;
13394 int private_data_size;
13395 PCRE2_SPTR ccend;
13396 executable_functions *functions;
13397 void *executable_func;
13398 sljit_uw executable_size;
13399 sljit_uw total_length;
13400 struct sljit_label *mainloop_label = NULL;
13401 struct sljit_label *continue_match_label;
13402 struct sljit_label *empty_match_found_label = NULL;
13403 struct sljit_label *empty_match_backtrack_label = NULL;
13404 struct sljit_label *reset_match_label;
13405 struct sljit_label *quit_label;
13406 struct sljit_jump *jump;
13407 struct sljit_jump *minlength_check_failed = NULL;
13408 struct sljit_jump *empty_match = NULL;
13409 struct sljit_jump *end_anchor_failed = NULL;
13410 jump_list *reqcu_not_found = NULL;
13411
13412 SLJIT_ASSERT(tables);
13413
13414 #if HAS_VIRTUAL_REGISTERS == 1
13415 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13416 #elif HAS_VIRTUAL_REGISTERS == 0
13417 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13418 #else
13419 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13420 #endif
13421
13422 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13423 memset(common, 0, sizeof(compiler_common));
13424 common->re = re;
13425 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13426 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13427
13428 #ifdef SUPPORT_UNICODE
13429 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13430 #endif /* SUPPORT_UNICODE */
13431 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13432
13433 common->start = rootbacktrack.cc;
13434 common->read_only_data_head = NULL;
13435 common->fcc = tables + fcc_offset;
13436 common->lcc = (sljit_sw)(tables + lcc_offset);
13437 common->mode = mode;
13438 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13439 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13440 common->nltype = NLTYPE_FIXED;
13441 switch(re->newline_convention)
13442 {
13443 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13444 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13445 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13446 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13447 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13448 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13449 default: return PCRE2_ERROR_INTERNAL;
13450 }
13451 common->nlmax = READ_CHAR_MAX;
13452 common->nlmin = 0;
13453 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13454 common->bsr_nltype = NLTYPE_ANY;
13455 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13456 common->bsr_nltype = NLTYPE_ANYCRLF;
13457 else
13458 {
13459 #ifdef BSR_ANYCRLF
13460 common->bsr_nltype = NLTYPE_ANYCRLF;
13461 #else
13462 common->bsr_nltype = NLTYPE_ANY;
13463 #endif
13464 }
13465 common->bsr_nlmax = READ_CHAR_MAX;
13466 common->bsr_nlmin = 0;
13467 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13468 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13469 common->name_count = re->name_count;
13470 common->name_entry_size = re->name_entry_size;
13471 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13472 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13473 #ifdef SUPPORT_UNICODE
13474 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13475 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13476 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13477 if (common->utf)
13478 {
13479 if (common->nltype == NLTYPE_ANY)
13480 common->nlmax = 0x2029;
13481 else if (common->nltype == NLTYPE_ANYCRLF)
13482 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13483 else
13484 {
13485 /* We only care about the first newline character. */
13486 common->nlmax = common->newline & 0xff;
13487 }
13488
13489 if (common->nltype == NLTYPE_FIXED)
13490 common->nlmin = common->newline & 0xff;
13491 else
13492 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13493
13494 if (common->bsr_nltype == NLTYPE_ANY)
13495 common->bsr_nlmax = 0x2029;
13496 else
13497 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13498 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13499 }
13500 else
13501 common->invalid_utf = FALSE;
13502 #endif /* SUPPORT_UNICODE */
13503 ccend = bracketend(common->start);
13504
13505 /* Calculate the local space size on the stack. */
13506 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13507 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13508 if (!common->optimized_cbracket)
13509 return PCRE2_ERROR_NOMEMORY;
13510 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13511 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13512 #else
13513 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13514 #endif
13515
13516 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13517 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13518 common->capture_last_ptr = common->ovector_start;
13519 common->ovector_start += sizeof(sljit_sw);
13520 #endif
13521 if (!check_opcode_types(common, common->start, ccend))
13522 {
13523 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13524 return PCRE2_ERROR_NOMEMORY;
13525 }
13526
13527 /* Checking flags and updating ovector_start. */
13528 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13529 {
13530 common->req_char_ptr = common->ovector_start;
13531 common->ovector_start += sizeof(sljit_sw);
13532 }
13533 if (mode != PCRE2_JIT_COMPLETE)
13534 {
13535 common->start_used_ptr = common->ovector_start;
13536 common->ovector_start += sizeof(sljit_sw);
13537 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13538 {
13539 common->hit_start = common->ovector_start;
13540 common->ovector_start += sizeof(sljit_sw);
13541 }
13542 }
13543 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13544 {
13545 common->match_end_ptr = common->ovector_start;
13546 common->ovector_start += sizeof(sljit_sw);
13547 }
13548 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13549 common->control_head_ptr = 1;
13550 #endif
13551 if (common->control_head_ptr != 0)
13552 {
13553 common->control_head_ptr = common->ovector_start;
13554 common->ovector_start += sizeof(sljit_sw);
13555 }
13556 if (common->has_set_som)
13557 {
13558 /* Saving the real start pointer is necessary. */
13559 common->start_ptr = common->ovector_start;
13560 common->ovector_start += sizeof(sljit_sw);
13561 }
13562
13563 /* Aligning ovector to even number of sljit words. */
13564 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13565 common->ovector_start += sizeof(sljit_sw);
13566
13567 if (common->start_ptr == 0)
13568 common->start_ptr = OVECTOR(0);
13569
13570 /* Capturing brackets cannot be optimized if callouts are allowed. */
13571 if (common->capture_last_ptr != 0)
13572 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13573
13574 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13575 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13576
13577 total_length = ccend - common->start;
13578 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13579 if (!common->private_data_ptrs)
13580 {
13581 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13582 return PCRE2_ERROR_NOMEMORY;
13583 }
13584 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13585
13586 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13587 set_private_data_ptrs(common, &private_data_size, ccend);
13588 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13589 detect_early_fail(common, common->start, &private_data_size, 0, 0);
13590
13591 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13592
13593 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13594 {
13595 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13596 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13597 return PCRE2_ERROR_NOMEMORY;
13598 }
13599
13600 if (common->has_then)
13601 {
13602 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13603 memset(common->then_offsets, 0, total_length);
13604 set_then_offsets(common, common->start, NULL);
13605 }
13606
13607 compiler = sljit_create_compiler(allocator_data);
13608 if (!compiler)
13609 {
13610 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13611 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13612 return PCRE2_ERROR_NOMEMORY;
13613 }
13614 common->compiler = compiler;
13615
13616 /* Main pcre_jit_exec entry. */
13617 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13618
13619 /* Register init. */
13620 reset_ovector(common, (re->top_bracket + 1) * 2);
13621 if (common->req_char_ptr != 0)
13622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13623
13624 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13625 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13626 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13627 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13628 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13629 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13630 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13631 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13632 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13634
13635 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13636 reset_early_fail(common);
13637
13638 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13640 if (common->mark_ptr != 0)
13641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13642 if (common->control_head_ptr != 0)
13643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13644
13645 /* Main part of the matching */
13646 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13647 {
13648 mainloop_label = mainloop_entry(common);
13649 continue_match_label = LABEL();
13650 /* Forward search if possible. */
13651 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13652 {
13653 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13654 ;
13655 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13656 fast_forward_first_char(common);
13657 else if ((re->flags & PCRE2_STARTLINE) != 0)
13658 fast_forward_newline(common);
13659 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13660 fast_forward_start_bits(common);
13661 }
13662 }
13663 else
13664 continue_match_label = LABEL();
13665
13666 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13667 {
13668 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13669 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13670 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13671 }
13672 if (common->req_char_ptr != 0)
13673 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13674
13675 /* Store the current STR_PTR in OVECTOR(0). */
13676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13677 /* Copy the limit of allowed recursions. */
13678 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13679 if (common->capture_last_ptr != 0)
13680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13681 if (common->fast_forward_bc_ptr != NULL)
13682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13683
13684 if (common->start_ptr != OVECTOR(0))
13685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13686
13687 /* Copy the beginning of the string. */
13688 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13689 {
13690 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13692 JUMPHERE(jump);
13693 }
13694 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13696
13697 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13698 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13699 {
13700 sljit_free_compiler(compiler);
13701 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13702 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13703 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13704 return PCRE2_ERROR_NOMEMORY;
13705 }
13706
13707 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13708 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13709
13710 if (common->might_be_empty)
13711 {
13712 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13713 empty_match_found_label = LABEL();
13714 }
13715
13716 common->accept_label = LABEL();
13717 if (common->accept != NULL)
13718 set_jumps(common->accept, common->accept_label);
13719
13720 /* This means we have a match. Update the ovector. */
13721 copy_ovector(common, re->top_bracket + 1);
13722 common->quit_label = common->abort_label = LABEL();
13723 if (common->quit != NULL)
13724 set_jumps(common->quit, common->quit_label);
13725 if (common->abort != NULL)
13726 set_jumps(common->abort, common->abort_label);
13727 if (minlength_check_failed != NULL)
13728 SET_LABEL(minlength_check_failed, common->abort_label);
13729
13730 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13731 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13732
13733 if (common->failed_match != NULL)
13734 {
13735 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13736 set_jumps(common->failed_match, LABEL());
13737 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13738 JUMPTO(SLJIT_JUMP, common->abort_label);
13739 }
13740
13741 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13742 JUMPHERE(end_anchor_failed);
13743
13744 if (mode != PCRE2_JIT_COMPLETE)
13745 {
13746 common->partialmatchlabel = LABEL();
13747 set_jumps(common->partialmatch, common->partialmatchlabel);
13748 return_with_partial_match(common, common->quit_label);
13749 }
13750
13751 if (common->might_be_empty)
13752 empty_match_backtrack_label = LABEL();
13753 compile_backtrackingpath(common, rootbacktrack.top);
13754 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13755 {
13756 sljit_free_compiler(compiler);
13757 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13758 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13759 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13760 return PCRE2_ERROR_NOMEMORY;
13761 }
13762
13763 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13764 reset_match_label = LABEL();
13765
13766 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13767 {
13768 /* Update hit_start only in the first time. */
13769 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13772 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13773 JUMPHERE(jump);
13774 }
13775
13776 /* Check we have remaining characters. */
13777 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13778 {
13779 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13780 }
13781
13782 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13783 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13784
13785 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13786 {
13787 if (common->ff_newline_shortcut != NULL)
13788 {
13789 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13790 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13791 {
13792 if (common->match_end_ptr != 0)
13793 {
13794 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13795 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13796 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13797 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13798 }
13799 else
13800 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13801 }
13802 }
13803 else
13804 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13805 }
13806
13807 /* No more remaining characters. */
13808 if (reqcu_not_found != NULL)
13809 set_jumps(reqcu_not_found, LABEL());
13810
13811 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13812 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13813
13814 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13815 JUMPTO(SLJIT_JUMP, common->quit_label);
13816
13817 flush_stubs(common);
13818
13819 if (common->might_be_empty)
13820 {
13821 JUMPHERE(empty_match);
13822 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13823 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13824 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13825 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13826 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13827 JUMPTO(SLJIT_ZERO, empty_match_found_label);
13828 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13829 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13830 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13831 }
13832
13833 common->fast_forward_bc_ptr = NULL;
13834 common->early_fail_start_ptr = 0;
13835 common->early_fail_end_ptr = 0;
13836 common->currententry = common->entries;
13837 common->local_quit_available = TRUE;
13838 quit_label = common->quit_label;
13839 while (common->currententry != NULL)
13840 {
13841 /* Might add new entries. */
13842 compile_recurse(common);
13843 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13844 {
13845 sljit_free_compiler(compiler);
13846 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13847 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13848 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13849 return PCRE2_ERROR_NOMEMORY;
13850 }
13851 flush_stubs(common);
13852 common->currententry = common->currententry->next;
13853 }
13854 common->local_quit_available = FALSE;
13855 common->quit_label = quit_label;
13856
13857 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13858 /* This is a (really) rare case. */
13859 set_jumps(common->stackalloc, LABEL());
13860 /* RETURN_ADDR is not a saved register. */
13861 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13862
13863 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13864
13865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13866 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13867 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13868 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13869 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13870
13871 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13872
13873 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13874 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13875 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13876 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13877 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13878 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13879
13880 /* Allocation failed. */
13881 JUMPHERE(jump);
13882 /* We break the return address cache here, but this is a really rare case. */
13883 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13884 JUMPTO(SLJIT_JUMP, common->quit_label);
13885
13886 /* Call limit reached. */
13887 set_jumps(common->calllimit, LABEL());
13888 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13889 JUMPTO(SLJIT_JUMP, common->quit_label);
13890
13891 if (common->revertframes != NULL)
13892 {
13893 set_jumps(common->revertframes, LABEL());
13894 do_revertframes(common);
13895 }
13896 if (common->wordboundary != NULL)
13897 {
13898 set_jumps(common->wordboundary, LABEL());
13899 check_wordboundary(common);
13900 }
13901 if (common->anynewline != NULL)
13902 {
13903 set_jumps(common->anynewline, LABEL());
13904 check_anynewline(common);
13905 }
13906 if (common->hspace != NULL)
13907 {
13908 set_jumps(common->hspace, LABEL());
13909 check_hspace(common);
13910 }
13911 if (common->vspace != NULL)
13912 {
13913 set_jumps(common->vspace, LABEL());
13914 check_vspace(common);
13915 }
13916 if (common->casefulcmp != NULL)
13917 {
13918 set_jumps(common->casefulcmp, LABEL());
13919 do_casefulcmp(common);
13920 }
13921 if (common->caselesscmp != NULL)
13922 {
13923 set_jumps(common->caselesscmp, LABEL());
13924 do_caselesscmp(common);
13925 }
13926 if (common->reset_match != NULL)
13927 {
13928 set_jumps(common->reset_match, LABEL());
13929 do_reset_match(common, (re->top_bracket + 1) * 2);
13930 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
13931 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
13932 JUMPTO(SLJIT_JUMP, reset_match_label);
13933 }
13934 #ifdef SUPPORT_UNICODE
13935 #if PCRE2_CODE_UNIT_WIDTH == 8
13936 if (common->utfreadchar != NULL)
13937 {
13938 set_jumps(common->utfreadchar, LABEL());
13939 do_utfreadchar(common);
13940 }
13941 if (common->utfreadtype8 != NULL)
13942 {
13943 set_jumps(common->utfreadtype8, LABEL());
13944 do_utfreadtype8(common);
13945 }
13946 if (common->utfpeakcharback != NULL)
13947 {
13948 set_jumps(common->utfpeakcharback, LABEL());
13949 do_utfpeakcharback(common);
13950 }
13951 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
13952 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
13953 if (common->utfreadchar_invalid != NULL)
13954 {
13955 set_jumps(common->utfreadchar_invalid, LABEL());
13956 do_utfreadchar_invalid(common);
13957 }
13958 if (common->utfreadnewline_invalid != NULL)
13959 {
13960 set_jumps(common->utfreadnewline_invalid, LABEL());
13961 do_utfreadnewline_invalid(common);
13962 }
13963 if (common->utfmoveback_invalid)
13964 {
13965 set_jumps(common->utfmoveback_invalid, LABEL());
13966 do_utfmoveback_invalid(common);
13967 }
13968 if (common->utfpeakcharback_invalid)
13969 {
13970 set_jumps(common->utfpeakcharback_invalid, LABEL());
13971 do_utfpeakcharback_invalid(common);
13972 }
13973 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
13974 if (common->getucd != NULL)
13975 {
13976 set_jumps(common->getucd, LABEL());
13977 do_getucd(common);
13978 }
13979 if (common->getucdtype != NULL)
13980 {
13981 set_jumps(common->getucdtype, LABEL());
13982 do_getucdtype(common);
13983 }
13984 #endif /* SUPPORT_UNICODE */
13985
13986 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13987 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13988
13989 executable_func = sljit_generate_code(compiler);
13990 executable_size = sljit_get_generated_code_size(compiler);
13991 sljit_free_compiler(compiler);
13992
13993 if (executable_func == NULL)
13994 {
13995 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13996 return PCRE2_ERROR_NOMEMORY;
13997 }
13998
13999 /* Reuse the function descriptor if possible. */
14000 if (re->executable_jit != NULL)
14001 functions = (executable_functions *)re->executable_jit;
14002 else
14003 {
14004 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14005 if (functions == NULL)
14006 {
14007 /* This case is highly unlikely since we just recently
14008 freed a lot of memory. Not impossible though. */
14009 sljit_free_code(executable_func);
14010 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14011 return PCRE2_ERROR_NOMEMORY;
14012 }
14013 memset(functions, 0, sizeof(executable_functions));
14014 functions->top_bracket = re->top_bracket + 1;
14015 functions->limit_match = re->limit_match;
14016 re->executable_jit = functions;
14017 }
14018
14019 /* Turn mode into an index. */
14020 if (mode == PCRE2_JIT_COMPLETE)
14021 mode = 0;
14022 else
14023 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14024
14025 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14026 functions->executable_funcs[mode] = executable_func;
14027 functions->read_only_data_heads[mode] = common->read_only_data_head;
14028 functions->executable_sizes[mode] = executable_size;
14029 return 0;
14030 }
14031
14032 #endif
14033
14034 /*************************************************
14035 * JIT compile a Regular Expression *
14036 *************************************************/
14037
14038 /* This function used JIT to convert a previously-compiled pattern into machine
14039 code.
14040
14041 Arguments:
14042 code a compiled pattern
14043 options JIT option bits
14044
14045 Returns: 0: success or (*NOJIT) was used
14046 <0: an error code
14047 */
14048
14049 #define PUBLIC_JIT_COMPILE_OPTIONS \
14050 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14051
14052 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14053 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14054 {
14055 pcre2_real_code *re = (pcre2_real_code *)code;
14056
14057 if (code == NULL)
14058 return PCRE2_ERROR_NULL;
14059
14060 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14061 return PCRE2_ERROR_JIT_BADOPTION;
14062
14063 /* Support for invalid UTF was first introduced in JIT, with the option
14064 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14065 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14066 preferred feature, with the earlier option deprecated. However, for backward
14067 compatibility, if the earlier option is set, it forces the new option so that
14068 if JIT matching falls back to the interpreter, there is still support for
14069 invalid UTF. However, if this function has already been successfully called
14070 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14071 non-invalid-supporting JIT code was compiled), give an error.
14072
14073 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14074 actions are needed:
14075
14076 1. Remove the definition from pcre2.h.in and from the list in
14077 PUBLIC_JIT_COMPILE_OPTIONS above.
14078
14079 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14080
14081 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14082
14083 4. Delete the following short block of code. The setting of "re" and
14084 "functions" can be moved into the JIT-only block below, but if that is
14085 done, (void)re and (void)functions will be needed in the non-JIT case, to
14086 avoid compiler warnings.
14087 */
14088
14089 #ifdef SUPPORT_JIT
14090 executable_functions *functions = (executable_functions *)re->executable_jit;
14091 static int executable_allocator_is_working = 0;
14092 #endif
14093
14094 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14095 {
14096 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14097 {
14098 #ifdef SUPPORT_JIT
14099 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14100 #endif
14101 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14102 }
14103 }
14104
14105 /* The above tests are run with and without JIT support. This means that
14106 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14107 interpreter support) even in the absence of JIT. But now, if there is no JIT
14108 support, give an error return. */
14109
14110 #ifndef SUPPORT_JIT
14111 return PCRE2_ERROR_JIT_BADOPTION;
14112 #else /* SUPPORT_JIT */
14113
14114 /* There is JIT support. Do the necessary. */
14115
14116 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14117
14118 if (executable_allocator_is_working == 0)
14119 {
14120 /* Checks whether the executable allocator is working. This check
14121 might run multiple times in multi-threaded environments, but the
14122 result should not be affected by it. */
14123 void *ptr = SLJIT_MALLOC_EXEC(32);
14124
14125 executable_allocator_is_working = -1;
14126
14127 if (ptr != NULL)
14128 {
14129 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr));
14130 executable_allocator_is_working = 1;
14131 }
14132 }
14133
14134 if (executable_allocator_is_working < 0)
14135 return PCRE2_ERROR_NOMEMORY;
14136
14137 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14138 options |= PCRE2_JIT_INVALID_UTF;
14139
14140 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14141 || functions->executable_funcs[0] == NULL)) {
14142 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14143 int result = jit_compile(code, options & ~excluded_options);
14144 if (result != 0)
14145 return result;
14146 }
14147
14148 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14149 || functions->executable_funcs[1] == NULL)) {
14150 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14151 int result = jit_compile(code, options & ~excluded_options);
14152 if (result != 0)
14153 return result;
14154 }
14155
14156 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14157 || functions->executable_funcs[2] == NULL)) {
14158 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14159 int result = jit_compile(code, options & ~excluded_options);
14160 if (result != 0)
14161 return result;
14162 }
14163
14164 return 0;
14165
14166 #endif /* SUPPORT_JIT */
14167 }
14168
14169 /* JIT compiler uses an all-in-one approach. This improves security,
14170 since the code generator functions are not exported. */
14171
14172 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14173
14174 #include "pcre2_jit_match.c"
14175 #include "pcre2_jit_misc.c"
14176
14177 /* End of pcre2_jit_compile.c */
14178