1 #include "engine.h"
2 #include "regnodes.h"
3 #include "regcomp.h"
4 #include <stdio.h>
5 #include <string.h>
6 #include <assert.h>
7
8 #if PERL_API_REVISION != 5
9 #error This module is only for Perl 5
10 #else
11 #if PERL_API_VERSION == 30
12 #define RC_EXACT_ONLY8
13 #else
14 #if PERL_API_VERSION == 32
15 #define RC_EXACT_REQ8
16 #define RC_ANYOFR
17 #else
18 #if PERL_API_VERSION == 34
19 #define RC_EXACT_REQ8
20 #define RC_ANYOFR
21 #else
22 #error Unsupported PERL_API_VERSION
23 #endif
24 #endif
25 #endif
26 #endif
27
28 #define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
29
30 #define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
31
32 #define LETTER_COUNT ('z' - 'a' + 1)
33
34 #define INFINITE_COUNT 0xffff
35
36 #define ALNUM_BLOCK 0x0001
37 #define SPACE_BLOCK 0x0002
38 #define ALPHA_BLOCK 0x0004
39 #define NUMBER_BLOCK 0x0008
40 #define UPPER_BLOCK 0x0010
41 #define LOWER_BLOCK 0x0020
42 #define HEX_DIGIT_BLOCK 0x0040
43 #define HORIZONTAL_SPACE_BLOCK 0x0080
44 #define VERTICAL_SPACE_BLOCK 0x0100
45
46 #define MIRROR_SHIFT 16
47 #define NOT_ALNUM_BLOCK (ALNUM_BLOCK << MIRROR_SHIFT)
48 #define NOT_SPACE_BLOCK (SPACE_BLOCK << MIRROR_SHIFT)
49 #define NOT_ALPHA_BLOCK (ALPHA_BLOCK << MIRROR_SHIFT)
50 #define NOT_NUMBER_BLOCK (NUMBER_BLOCK << MIRROR_SHIFT)
51 #define NOT_UPPER_BLOCK (UPPER_BLOCK << MIRROR_SHIFT)
52 #define NOT_LOWER_BLOCK (LOWER_BLOCK << MIRROR_SHIFT)
53 #define NOT_HEX_DIGIT_BLOCK (HEX_DIGIT_BLOCK << MIRROR_SHIFT)
54 #define NOT_HORIZONTAL_SPACE_BLOCK (HORIZONTAL_SPACE_BLOCK << MIRROR_SHIFT)
55 #define NOT_VERTICAL_SPACE_BLOCK (VERTICAL_SPACE_BLOCK << MIRROR_SHIFT)
56
57 #define EVERY_BLOCK 0x01ff01ff
58
59 #define FORCED_BYTE 0x01
60 #define FORCED_CHAR 0x02
61 #define FORCED_MISMATCH (FORCED_BYTE | FORCED_CHAR)
62
63 #define MIRROR_BLOCK(b) ((((b) & 0xffff) << MIRROR_SHIFT) | ((b) >> MIRROR_SHIFT))
64
65 /* Regexp terms are normally regnodes, except for EXACT (and EXACTF)
66 nodes, which can bundle many characters, which we have to compare
67 separately. Occasionally, we also need access to extra regexp
68 data. */
69 typedef struct
70 {
71 regexp *origin;
72 regnode *rn;
73 int spent;
74 } Arrow;
75
76 #define GET_LITERAL(a) (((char *)((a)->rn + 1)) + (a)->spent)
77
78 #define GET_OFFSET(rn) ((rn)->next_off ? (rn)->next_off : get_synth_offset(rn))
79
80 typedef U16 CurlyCount;
81
82 /* Most functions below have this signature. The first parameter is a
83 flag set after the comparison actually matched something, second
84 parameter points into the first ("left") regexp passed into
85 rc_compare, the third into the second ("right") regexp. Return
86 value is 1 for match, 0 no match, -1 error (with the lowest-level
87 failing function setting rc_error before returning it). */
88 typedef int (*FCompare)(int, Arrow *, Arrow *);
89
90 /* Place of a char in regexp bitmap. */
91 typedef struct
92 {
93 int offs;
94 unsigned char mask;
95 } BitFlag;
96
97 /* Set of chars and its complement formatted for convenient
98 matching. */
99 typedef struct
100 {
101 char *expl;
102 int expl_size;
103 char lookup[256];
104 char nlookup[256];
105 unsigned char bitmap[ANYOF_BITMAP_SIZE];
106 unsigned char nbitmap[ANYOF_BITMAP_SIZE];
107 } ByteClass;
108
109 char *rc_error = 0;
110
111 unsigned char forced_byte[ANYOF_BITMAP_SIZE];
112
113 /* since Perl 5.18, \s matches vertical tab - see
114 * https://www.effectiveperlprogramming.com/2013/06/the-vertical-tab-is-part-of-s-in-perl-5-18/
115 * , or "Pattern White Space" in perlre */
116 static char whitespace_expl[] = { ' ', '\f', '\n', '\r', '\t', '\v' };
117
118 static ByteClass whitespace;
119
120 static char horizontal_whitespace_expl[] = { '\t', ' ' };
121
122 static ByteClass horizontal_whitespace;
123
124 static char vertical_whitespace_expl[] = { '\r', '\v', '\f', '\n' };
125
126 static ByteClass vertical_whitespace;
127
128 static char digit_expl[10];
129
130 static ByteClass digit;
131
132 static char xdigit_expl[10 + 2 * 6];
133
134 static ByteClass xdigit;
135
136 static char ndot_expl[] = { '\n' };
137
138 static ByteClass ndot;
139
140 static char alphanumeric_expl[11 + 2 * LETTER_COUNT];
141
142 static ByteClass word_bc;
143
144 static ByteClass alnum_bc;
145
146 static char alpha_expl[2 * LETTER_COUNT];
147
148 static ByteClass alpha_bc;
149
150 static char lower_expl[LETTER_COUNT];
151
152 static ByteClass lower_bc;
153
154 static char upper_expl[LETTER_COUNT];
155
156 static ByteClass upper_bc;
157
158 /* true flags for ALNUM and its subsets, 0 otherwise */
159 static unsigned char alphanumeric_classes[REGNODE_MAX];
160
161 /* true flags for NALNUM and its subsets, 0 otherwise */
162 static unsigned char non_alphanumeric_classes[REGNODE_MAX];
163
164 static unsigned char word_posix_regclasses[_CC_VERTSPACE + 1];
165
166 static unsigned char non_word_posix_regclasses[_CC_VERTSPACE + 1];
167
168 static unsigned char newline_posix_regclasses[_CC_VERTSPACE + 1];
169
170 /* Simplified hierarchy of character classes; ignoring the difference
171 between classes (i.e. IsAlnum & IsWord), which we probably
172 shouldn't - it is a documented bug, though... */
173 static char *regclass_names[] = { "Digit", "IsAlnum", "IsSpacePerl",
174 "IsHorizSpace", "IsVertSpace",
175 "IsWord", "IsXPosixAlnum", "IsXPosixXDigit",
176 "IsAlpha", "IsXPosixAlpha",
177 "IsDigit", "IsLower", "IsUpper",
178 "IsXDigit", "SpacePerl", "VertSpace",
179 "Word", "XPosixDigit",
180 "XPosixWord", "XPosixAlpha", "XPosixAlnum",
181 "XPosixXDigit" };
182
183 static U32 regclass_blocks[] = { NUMBER_BLOCK, ALNUM_BLOCK, SPACE_BLOCK,
184 HORIZONTAL_SPACE_BLOCK,
185 VERTICAL_SPACE_BLOCK, ALNUM_BLOCK,
186 ALNUM_BLOCK, HEX_DIGIT_BLOCK, ALPHA_BLOCK,
187 ALPHA_BLOCK, NUMBER_BLOCK, LOWER_BLOCK,
188 UPPER_BLOCK, HEX_DIGIT_BLOCK, SPACE_BLOCK,
189 VERTICAL_SPACE_BLOCK, ALNUM_BLOCK,
190 NUMBER_BLOCK, ALNUM_BLOCK, ALPHA_BLOCK,
191 ALNUM_BLOCK, HEX_DIGIT_BLOCK};
192
193 static U32 regclass_superset[] = { NOT_SPACE_BLOCK,
194 NOT_ALPHA_BLOCK, NOT_NUMBER_BLOCK,
195 ALNUM_BLOCK, ALNUM_BLOCK,
196 ALPHA_BLOCK, ALPHA_BLOCK, HEX_DIGIT_BLOCK,
197 SPACE_BLOCK, NOT_NUMBER_BLOCK,
198 NOT_HEX_DIGIT_BLOCK };
199 static U32 regclass_subset[] = { ALNUM_BLOCK,
200 NOT_ALNUM_BLOCK, NOT_ALNUM_BLOCK,
201 ALPHA_BLOCK, NUMBER_BLOCK,
202 UPPER_BLOCK, LOWER_BLOCK, NUMBER_BLOCK,
203 HORIZONTAL_SPACE_BLOCK,
204 VERTICAL_SPACE_BLOCK, VERTICAL_SPACE_BLOCK };
205
206 static U32 posix_regclass_blocks[_CC_VERTSPACE + 1] = {
207 ALNUM_BLOCK /* _CC_WORDCHAR == 0 */,
208 NUMBER_BLOCK /* _CC_DIGIT == 1 */,
209 ALPHA_BLOCK /* _CC_ALPHA == 2 */,
210 LOWER_BLOCK /* _CC_LOWER == 3 */,
211 UPPER_BLOCK /* _CC_UPPER == 4 */,
212 0,
213 0,
214 ALNUM_BLOCK /* _CC_ALPHANUMERIC == 7 */,
215 0,
216 0,
217 SPACE_BLOCK /* _CC_SPACE == 10 */,
218 HORIZONTAL_SPACE_BLOCK /* _CC_BLANK == 11, and according to perlrecharclass "\p{Blank}" and "\p{HorizSpace}" are synonyms. */,
219 HEX_DIGIT_BLOCK /* _CC_XDIGIT == 12 */
220 }; /* _CC_VERTSPACE set in rc_init because it has different values between perl 5.20 and 5.22 */
221
222 static unsigned char *posix_regclass_bitmaps[_CC_VERTSPACE + 1] = {
223 word_bc.bitmap,
224 digit.bitmap,
225 alpha_bc.bitmap,
226 lower_bc.bitmap,
227 upper_bc.bitmap,
228 0,
229 0,
230 alnum_bc.bitmap,
231 0,
232 0,
233 whitespace.bitmap,
234 horizontal_whitespace.bitmap,
235 xdigit.bitmap
236 };
237
238 static unsigned char *posix_regclass_nbitmaps[_CC_VERTSPACE + 1] = {
239 word_bc.nbitmap,
240 digit.nbitmap,
241 0,
242 0,
243 0,
244 0,
245 0,
246 alnum_bc.nbitmap,
247 0,
248 0,
249 whitespace.nbitmap,
250 horizontal_whitespace.nbitmap,
251 xdigit.nbitmap
252 };
253
254 static unsigned char trivial_nodes[REGNODE_MAX];
255
256 static FCompare dispatch[REGNODE_MAX][REGNODE_MAX];
257
258 static int compare(int anchored, Arrow *a1, Arrow *a2);
259 static int compare_right_branch(int anchored, Arrow *a1, Arrow *a2);
260 static int compare_right_curly(int anchored, Arrow *a1, Arrow *a2);
261
init_bit_flag(BitFlag * bf,int c)262 static void init_bit_flag(BitFlag *bf, int c)
263 {
264 assert(c >= 0);
265
266 bf->offs = c / 8;
267 bf->mask = 1 << (c % 8);
268 }
269
init_forced_byte()270 static void init_forced_byte()
271 {
272 char forced_byte_expl[] = { 'a', 'b', 'c', 'e', 'f', 'x' };
273 BitFlag bf;
274 int i;
275
276 memset(forced_byte, 0, sizeof(forced_byte));
277
278 for (i = 0; i < sizeof(forced_byte_expl); ++i)
279 {
280 init_bit_flag(&bf, (unsigned char)forced_byte_expl[i]);
281 forced_byte[bf.offs] |= bf.mask;
282 }
283
284 for (i = 0; i < 8; ++i)
285 {
286 init_bit_flag(&bf, (unsigned char)('0' + i));
287 forced_byte[bf.offs] |= bf.mask;
288 }
289 }
290
init_byte_class(ByteClass * bc,char * expl,int expl_size)291 static void init_byte_class(ByteClass *bc, char *expl, int expl_size)
292 {
293 BitFlag bf;
294 int i;
295
296 bc->expl = expl;
297 bc->expl_size = expl_size;
298
299 memset(bc->lookup, 0, sizeof(bc->lookup));
300 memset(bc->nlookup, 1, sizeof(bc->nlookup));
301 memset(bc->bitmap, 0, sizeof(bc->bitmap));
302 memset(bc->nbitmap, 0xff, sizeof(bc->nbitmap));
303
304 for (i = 0; i < expl_size; ++i)
305 {
306 bc->lookup[(unsigned char)expl[i]] = 1;
307 bc->nlookup[(unsigned char)expl[i]] = 0;
308
309 init_bit_flag(&bf, (unsigned char)expl[i]);
310 bc->bitmap[bf.offs] |= bf.mask;
311 bc->nbitmap[bf.offs] &= ~bf.mask;
312 }
313 }
314
init_unfolded(char * unf,char c)315 static void init_unfolded(char *unf, char c)
316 {
317 *unf = TOLOWER(c);
318 unf[1] = ((*unf >= 'a') && (*unf <= 'z')) ? *unf - 'a' + 'A' : *unf;
319 }
320
extend_mask(U32 mask)321 static U32 extend_mask(U32 mask)
322 {
323 U32 prev_mask;
324 int i, j;
325
326 /* extra cycle is inefficient but makes superset & subset
327 definitions order-independent */
328 prev_mask = 0;
329 while (mask != prev_mask)
330 {
331 prev_mask = mask;
332 for (i = 0; i < 2; ++i)
333 {
334 for (j = 0; j < SIZEOF_ARRAY(regclass_superset); ++j)
335 {
336 U32 b = regclass_superset[j];
337 U32 s = regclass_subset[j];
338 if (i)
339 {
340 U32 t;
341
342 t = MIRROR_BLOCK(b);
343 b = MIRROR_BLOCK(s);
344 s = t;
345 }
346
347 if (mask & b)
348 {
349 mask |= s;
350 }
351 }
352 }
353 }
354
355 return mask;
356 }
357
convert_desc_to_map(char * desc,int invert,U32 * map)358 static int convert_desc_to_map(char *desc, int invert, U32 *map)
359 {
360 int i;
361 U32 mask = 0;
362 char *p;
363
364 /* fprintf(stderr, "enter convert_desc_to_map(%s, %d\n", desc, invert); */
365
366 p = strstr(desc, "utf8::");
367 /* make sure *(p - 1) is valid */
368 if (p == desc)
369 {
370 rc_error = "no inversion flag before character class description";
371 return -1;
372 }
373
374 while (p)
375 {
376 char sign = *(p - 1);
377 for (i = 0; i < SIZEOF_ARRAY(regclass_names); ++i)
378 {
379 if (!strncmp(p + 6, regclass_names[i], strlen(regclass_names[i])))
380 {
381 if (sign == '+')
382 {
383 if (mask & (regclass_blocks[i] << MIRROR_SHIFT))
384 {
385 *map = invert ? 0 : EVERY_BLOCK;
386 return 1;
387 }
388
389 mask |= regclass_blocks[i];
390 }
391 else if (sign == '!')
392 {
393 if (mask & regclass_blocks[i])
394 {
395 *map = invert ? 0 : EVERY_BLOCK;
396 return 1;
397 }
398
399 mask |= (regclass_blocks[i] << MIRROR_SHIFT);
400 }
401 else
402 {
403 rc_error = "unknown inversion flag before character class description";
404 return -1;
405 }
406 }
407 }
408
409 p = strstr(p + 6, "utf8::");
410 }
411
412 /* fprintf(stderr, "parsed 0x%x\n", (unsigned)mask); */
413
414 if ((mask & ALPHA_BLOCK) && (mask & NUMBER_BLOCK))
415 {
416 mask |= ALNUM_BLOCK;
417 }
418
419 if (invert)
420 {
421 mask = MIRROR_BLOCK(mask);
422 }
423
424 if ((mask & ALPHA_BLOCK) && (mask & NUMBER_BLOCK))
425 {
426 mask |= ALNUM_BLOCK;
427 }
428
429 *map = extend_mask(mask);
430 return 1;
431 }
432
433 /* invlist methods are static inside regcomp.c, so we must copy them... */
get_invlist_offset_addr(SV * invlist)434 static bool *get_invlist_offset_addr(SV *invlist)
435 {
436 return &(((XINVLIST*) SvANY(invlist))->is_offset);
437 }
438
get_invlist_len(SV * invlist)439 static UV get_invlist_len(SV *invlist)
440 {
441 return (SvCUR(invlist) == 0)
442 ? 0
443 : (SvCUR(invlist) / sizeof(UV)) - *get_invlist_offset_addr(invlist);
444 }
445
invlist_array(SV * invlist)446 static UV *invlist_array(SV *invlist)
447 {
448 return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist));
449 }
450
451 /* #define DEBUG_dump_invlist */
452
convert_invlist_to_map(SV * invlist,int invert,U32 * map)453 static int convert_invlist_to_map(SV *invlist, int invert, U32 *map)
454 {
455 /*
456 Not quite what's in charclass_invlists.h - we skip the header
457 as well as all ASCII values.
458 Note that changes to the arrays may require changing the switch
459 below.
460 */
461 static UV perl_space_invlist[] = { 128,
462 #include "XPerlSpace.30"
463 };
464
465 static UV perl_space_short_invlist[] = { 256,
466 #include "XPerlSpace.30a"
467 };
468
469 static UV horizontal_space_invlist[] = { 128, 160, 161, 5760, 5761,
470 6158, 6159, 8192, 8203, 8239, 8240, 8287, 8288, 12288, 12289 };
471
472 static UV vertical_space_invlist[] = { 128, 133, 134, 8232, 8234 };
473
474 static UV xposix_digit_invlist[] = { 128,
475 #include "XPosixDigit.22"
476 };
477
478 static UV xposix_alnum_invlist[] = { 128,
479 #if PERL_API_VERSION == 30
480 #include "XPosixAlnum.30"
481 #else
482 #if PERL_API_VERSION == 32
483 #include "XPosixAlnum.32"
484 #else
485 #if PERL_API_VERSION == 34
486 #include "XPosixAlnum.32"
487 #else
488 #error unexpected PERL_API_VERSION
489 #endif
490 #endif
491 #endif
492 };
493
494 static UV xposix_alpha_invlist[] = { 128,
495 #include "XPosixAlpha.28"
496 };
497
498 static UV xposix_word_invlist[] = { 128,
499 #include "XPosixWord.22"
500 };
501
502 static UV xposix_xdigit_invlist[] = { 128, 65296, 65306, 65313,
503 65319, 65345, 65351 };
504
505 #ifdef DEBUG_dump_invlist
506 U16 i;
507 char div[3];
508 #endif
509
510 UV *ila;
511 UV ill;
512 U32 mask = 0;
513
514 #ifdef DEBUG_dump_invlist
515 fprintf(stderr, "enter convert_invlist_to_map(..., %d, ...)\n", invert);
516 #endif
517
518 ill = get_invlist_len(invlist);
519 #ifdef DEBUG_dump_invlist
520 fprintf(stderr, "ill = %lu\n", ill);
521 #endif
522 ila = ill ? invlist_array(invlist) : 0;
523
524 switch (ill)
525 {
526 case SIZEOF_ARRAY(perl_space_invlist):
527 if (!memcmp(ila, perl_space_invlist, sizeof(perl_space_invlist)))
528 {
529 #ifdef DEBUG_dump_invlist
530 fprintf(stderr, "NOT_SPACE_BLOCK\n");
531 #endif
532 mask = NOT_SPACE_BLOCK;
533 }
534
535 break;
536
537 case SIZEOF_ARRAY(perl_space_invlist) - 1:
538 if (!memcmp(ila, perl_space_invlist + 1,
539 sizeof(perl_space_invlist) - sizeof(perl_space_invlist[0])))
540 {
541 #ifdef DEBUG_dump_invlist
542 fprintf(stderr, "SPACE_BLOCK\n");
543 #endif
544 mask = SPACE_BLOCK;
545 }
546
547 break;
548
549 case SIZEOF_ARRAY(perl_space_short_invlist):
550 if (!memcmp(ila, perl_space_short_invlist, sizeof(perl_space_short_invlist)))
551 {
552 #ifdef DEBUG_dump_invlist
553 fprintf(stderr, "NOT_SPACE_BLOCK\n");
554 #endif
555 mask = NOT_SPACE_BLOCK;
556 }
557
558 break;
559
560 case SIZEOF_ARRAY(horizontal_space_invlist):
561 if (!memcmp(ila, horizontal_space_invlist, sizeof(horizontal_space_invlist)))
562 {
563 #ifdef DEBUG_dump_invlist
564 fprintf(stderr, "NOT_HORIZONTAL_SPACE_BLOCK\n");
565 #endif
566 mask = NOT_HORIZONTAL_SPACE_BLOCK;
567 }
568
569 break;
570
571 case SIZEOF_ARRAY(horizontal_space_invlist) - 1:
572 if (!memcmp(ila, horizontal_space_invlist + 1,
573 sizeof(horizontal_space_invlist) - sizeof(horizontal_space_invlist[0])))
574 {
575 #ifdef DEBUG_dump_invlist
576 fprintf(stderr, "HORIZONTAL_SPACE_BLOCK\n");
577 #endif
578 mask = HORIZONTAL_SPACE_BLOCK;
579 }
580
581 break;
582
583 case SIZEOF_ARRAY(vertical_space_invlist):
584 if (!memcmp(ila, vertical_space_invlist, sizeof(vertical_space_invlist)))
585 {
586 #ifdef DEBUG_dump_invlist
587 fprintf(stderr, "NOT_VERTICAL_SPACE_BLOCK\n");
588 #endif
589 mask = NOT_VERTICAL_SPACE_BLOCK;
590 }
591
592 break;
593
594 case SIZEOF_ARRAY(vertical_space_invlist) - 1:
595 if (!memcmp(ila, vertical_space_invlist + 1,
596 sizeof(vertical_space_invlist) - sizeof(vertical_space_invlist[0])))
597 {
598 #ifdef DEBUG_dump_invlist
599 fprintf(stderr, "VERTICAL_SPACE_BLOCK\n");
600 #endif
601 mask = VERTICAL_SPACE_BLOCK;
602 }
603
604 break;
605
606 case SIZEOF_ARRAY(xposix_digit_invlist):
607 if (!memcmp(ila, xposix_digit_invlist, sizeof(xposix_digit_invlist)))
608 {
609 #ifdef DEBUG_dump_invlist
610 fprintf(stderr, "NOT_NUMBER_BLOCK\n");
611 #endif
612 mask = NOT_NUMBER_BLOCK;
613 }
614
615 break;
616
617 case SIZEOF_ARRAY(xposix_digit_invlist) - 1:
618 if (!memcmp(ila, xposix_digit_invlist + 1,
619 sizeof(xposix_digit_invlist) - sizeof(xposix_digit_invlist[0])))
620 {
621 #ifdef DEBUG_dump_invlist
622 fprintf(stderr, "NUMBER_BLOCK\n");
623 #endif
624 mask = NUMBER_BLOCK;
625 }
626
627 break;
628
629 case SIZEOF_ARRAY(xposix_alnum_invlist):
630 if (!memcmp(ila, xposix_alnum_invlist, sizeof(xposix_alnum_invlist)))
631 {
632 #ifdef DEBUG_dump_invlist
633 fprintf(stderr, "NOT_ALNUM_BLOCK\n");
634 #endif
635 mask = NOT_ALNUM_BLOCK;
636 }
637
638 break;
639
640 case SIZEOF_ARRAY(xposix_alnum_invlist) - 1:
641 if (!memcmp(ila, xposix_alnum_invlist + 1,
642 sizeof(xposix_alnum_invlist) - sizeof(xposix_alnum_invlist[0])))
643 {
644 #ifdef DEBUG_dump_invlist
645 fprintf(stderr, "ALNUM_BLOCK\n");
646 #endif
647 mask = ALNUM_BLOCK;
648 }
649
650 break;
651
652 case SIZEOF_ARRAY(xposix_alpha_invlist):
653 if (!memcmp(ila, xposix_alpha_invlist, sizeof(xposix_alpha_invlist)))
654 {
655 #ifdef DEBUG_dump_invlist
656 fprintf(stderr, "NOT_ALPHA_BLOCK\n");
657 #endif
658 mask = NOT_ALPHA_BLOCK;
659 }
660
661 break;
662
663 case SIZEOF_ARRAY(xposix_alpha_invlist) - 1:
664 if (!memcmp(ila, xposix_alpha_invlist + 1,
665 sizeof(xposix_alpha_invlist) - sizeof(xposix_alpha_invlist[0])))
666 {
667 #ifdef DEBUG_dump_invlist
668 fprintf(stderr, "ALPHA_BLOCK\n");
669 #endif
670 mask = ALPHA_BLOCK;
671 }
672
673 break;
674
675 case SIZEOF_ARRAY(xposix_word_invlist):
676 if (!memcmp(ila, xposix_word_invlist, sizeof(xposix_word_invlist)))
677 {
678 #ifdef DEBUG_dump_invlist
679 fprintf(stderr, "NOT_ALPHA_BLOCK\n");
680 #endif
681 mask = NOT_ALPHA_BLOCK;
682 }
683
684 break;
685
686 case SIZEOF_ARRAY(xposix_word_invlist) - 1:
687 if (!memcmp(ila, xposix_word_invlist + 1,
688 sizeof(xposix_word_invlist) - sizeof(xposix_word_invlist[0])))
689 {
690 #ifdef DEBUG_dump_invlist
691 fprintf(stderr, "ALPHA_BLOCK\n");
692 #endif
693 mask = ALPHA_BLOCK;
694 }
695
696 break;
697
698 case SIZEOF_ARRAY(xposix_xdigit_invlist):
699 if (!memcmp(ila, xposix_xdigit_invlist, sizeof(xposix_xdigit_invlist)))
700 {
701 #ifdef DEBUG_dump_invlist
702 fprintf(stderr, "NOT_NUMBER_BLOCK\n");
703 #endif
704 mask = NOT_NUMBER_BLOCK;
705 }
706
707 break;
708
709 case SIZEOF_ARRAY(xposix_xdigit_invlist) - 1:
710 if (!memcmp(ila, xposix_xdigit_invlist + 1,
711 sizeof(xposix_xdigit_invlist) - sizeof(xposix_xdigit_invlist[0])))
712 {
713 #ifdef DEBUG_dump_invlist
714 fprintf(stderr, "NUMBER_BLOCK\n");
715 #endif
716 mask = NUMBER_BLOCK;
717 }
718
719 break;
720 }
721
722 if (mask)
723 {
724 if (invert)
725 {
726 mask = MIRROR_BLOCK(mask);
727 }
728
729 *map = extend_mask(mask);
730 return 1;
731 }
732
733 #ifdef DEBUG_dump_invlist
734 div[0] = 0;
735 div[2] = 0;
736 for (i = 0; i < ill; ++i)
737 {
738 fprintf(stderr, "%s0x%x", div, (int)(ila[i]));
739 div[0] = ',';
740 div[1] = '\n';
741 }
742
743 fprintf(stderr, "\n");
744 #endif
745
746 return 0;
747 }
748
convert_regclass_map(Arrow * a,U32 * map)749 static int convert_regclass_map(Arrow *a, U32 *map)
750 {
751 regexp_internal *pr;
752 U32 n;
753 struct reg_data *rdata;
754
755 /* fprintf(stderr, "enter convert_regclass_map\n"); */
756
757 assert((a->rn->type == ANYOF) || (a->rn->type == ANYOFD));
758
759 /* basically copied from regexec.c:regclass_swash */
760 n = ARG_LOC(a->rn);
761 pr = RXi_GET(a->origin);
762 if (!pr) /* this should have been tested by find_internal during
763 initialization, but just in case... */
764 {
765 rc_error = "regexp_internal not found";
766 return -1;
767 }
768
769 rdata = pr->data;
770
771 if ((n < rdata->count) &&
772 (rdata->what[n] == 's')) {
773 SV *rv = (SV *)(rdata->data[n]);
774 AV *av = (AV *)SvRV(rv);
775 SV **ary = AvARRAY(av);
776 SV *si = *ary;
777
778 /* from get_regclass_nonbitmap_data of perl 5.30.0: invlist is
779 in ary[0] */
780 if (si) /* not so unconditional in Perl 5.32 */
781 {
782 return convert_invlist_to_map(si,
783 !!(a->rn->flags & ANYOF_INVERT),
784 map);
785 }
786 else
787 {
788 /* fprintf(stderr, "invlist not found\n"); */
789 return 0;
790 }
791 }
792
793 rc_error = "regclass not found";
794 return -1;
795 }
796
797 /* lifted from Perl__get_regclass_nonbitmap_data */
get_invlist_sv(Arrow * a)798 static SV *get_invlist_sv(Arrow *a)
799 {
800 RXi_GET_DECL(a->origin, progi);
801 struct reg_data *data = progi->data;
802
803 assert((a->rn->type == ANYOF) || (a->rn->type == ANYOFD));
804
805 if (data && data->count)
806 {
807 const U32 n = ARG(a->rn);
808
809 if (data->what[n] == 's')
810 {
811 SV * const rv = MUTABLE_SV(data->data[n]);
812 AV * const av = MUTABLE_AV(SvRV(rv));
813 SV **const ary = AvARRAY(av);
814
815 return *ary;
816 }
817 }
818
819 return 0;
820 }
821
822 /* returns 1 OK (map set), 0 map not recognized/representable, -1
823 unexpected input (rc_error set) */
convert_map(Arrow * a,U32 * map)824 static int convert_map(Arrow *a, U32 *map)
825 {
826 /* fprintf(stderr, "enter convert_map\n"); */
827
828 assert((a->rn->type == ANYOF) || (a->rn->type == ANYOFD));
829 assert(map);
830
831 if (ANYOF_FLAGS(a->rn) & (ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP | ANYOF_INVERT))
832 {
833 return convert_regclass_map(a, map);
834 }
835 else
836 {
837 /* fprintf(stderr, "zero map\n"); */
838 *map = 0;
839 return 1;
840 }
841 }
842
843 /* returns 1 OK (map set), 0 map not recognized/representable */
convert_class_narrow(Arrow * a,U32 * map)844 static int convert_class_narrow(Arrow *a, U32 *map)
845 {
846 /* fprintf(stderr, "enter convert_class_narrow\n"); */
847
848 assert(map);
849
850 if (a->rn->flags >= SIZEOF_ARRAY(posix_regclass_blocks))
851 {
852 /* fprintf(stderr, "unknown class %d\n", a->rn->flags); */
853 return 0;
854 }
855
856 U32 mask = posix_regclass_blocks[a->rn->flags];
857 if (!mask)
858 {
859 /* fprintf(stderr, "class %d ignored\n", a->rn->flags); */
860 return 0;
861 }
862
863 *map = mask;
864 return 1;
865 }
866
867 #ifdef RC_ANYOFR
convert_anyofr_to_bitmap(Arrow * a,unsigned char * b)868 static int convert_anyofr_to_bitmap(Arrow *a, unsigned char *b)
869 {
870 regnode *n = a->rn;
871 U32 arg = ARG(n);
872 U32 delta = arg & 0xFFF00000;
873 U32 start = arg & 0xFFFFF;
874 unsigned i;
875 BitFlag bf;
876
877 delta >>= 20;
878 if ((start != FLAGS(n)) || ((start + delta) >= 256)) /* UTF-8 not supported */
879 {
880 /* fprintf(stderr, "needs UTF-8\n"); */
881 return 0;
882 }
883
884 memset(b, 0, ANYOF_BITMAP_SIZE);
885 for (i = 0; i <= delta; i++)
886 {
887 init_bit_flag(&bf, start + i);
888 b[bf.offs] |= bf.mask;
889 }
890
891 return 1;
892 }
893 #endif
894
895 /* Adapted from regcomp.c:get_ANYOFM_contents. b must point to
896 ANYOF_BITMAP_SIZE bytes. Returns 1 OK (b set), 0 matches something
897 above the bitmap. */
convert_anyofm_to_bitmap(Arrow * a,unsigned char * b)898 static int convert_anyofm_to_bitmap(Arrow *a, unsigned char *b)
899 {
900 regnode *n = a->rn;
901 U8 lowest = (U8)ARG(n);
902 unsigned count = 0;
903 unsigned needed = 1U << PL_bitcount[(U8)~FLAGS(n)];
904 unsigned i;
905 BitFlag bf;
906
907 memset(b, 0, ANYOF_BITMAP_SIZE);
908 for (i = lowest; i <= 0xFF; i++)
909 {
910 if ((i & FLAGS(n)) == ARG(n))
911 {
912 init_bit_flag(&bf, i);
913 b[bf.offs] |= bf.mask;
914
915 if (++count >= needed)
916 {
917 return 1;
918 }
919 }
920 }
921
922 return 0;
923 }
924
925 /* returns 1 OK (map set), 0 map not recognized/representable */
convert_class(Arrow * a,U32 * map)926 static int convert_class(Arrow *a, U32 *map)
927 {
928 if (!convert_class_narrow(a, map))
929 {
930 return 0;
931 }
932
933 *map = extend_mask(*map);
934 return 1;
935 }
936
convert_negative_class(Arrow * a,U32 * map)937 static int convert_negative_class(Arrow *a, U32 *map)
938 {
939 U32 mask;
940
941 if (!convert_class_narrow(a, &mask))
942 {
943 return 0;
944 }
945
946 *map = extend_mask(MIRROR_BLOCK(mask));
947 return 1;
948 }
949
get_assertion_offset(regnode * p)950 static int get_assertion_offset(regnode *p)
951 {
952 int offs;
953
954 offs = ARG_LOC(p);
955 if (offs <= 2)
956 {
957 rc_error = "Assertion offset too small";
958 return -1;
959 }
960
961 return offs;
962 }
963
get_synth_offset(regnode * p)964 static int get_synth_offset(regnode *p)
965 {
966 assert(!p->next_off);
967
968 if (((p->type == EXACT) || (p->type == EXACTF) || (p->type == EXACTFU)) &&
969 (p->flags == 1))
970 {
971 return 2;
972 }
973 else if (trivial_nodes[p->type] ||
974 (p->type == REG_ANY) || (p->type == SANY) ||
975 (p->type == POSIXD) || (p->type == NPOSIXD) ||
976 (p->type == POSIXU) || (p->type == NPOSIXU) ||
977 (p->type == POSIXA) || (p->type == NPOSIXA) ||
978 (p->type == LNBREAK))
979 {
980 return 1;
981 }
982 else if ((p->type == ANYOF) || (p->type == ANYOFD))
983 {
984 /* other flags obviously exist, but they haven't been seen yet
985 and it isn't clear what they mean */
986 unsigned int unknown = p->flags & ~(ANYOF_INVERT |
987 ANYOF_MATCHES_ALL_ABOVE_BITMAP | ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP | ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
988 if (unknown)
989 {
990 /* p[10] seems always 0 on Linux, but 0xfbfaf9f8 seen on
991 Windows; for '[\\w\\-_.]+\\.', both 0 and 0x20202020
992 observed in p[11] - wonder what those are... */
993 rc_error = "Unknown bitmap format";
994 return -1;
995 }
996
997 return 10;
998 }
999 #ifdef RC_ANYOFR
1000 else if (p->type == ANYOFR)
1001 {
1002 return 2;
1003 }
1004 #endif
1005 else if (p->type == ANYOFM)
1006 {
1007 return 2;
1008 }
1009 else if (p->type == NANYOFM)
1010 {
1011 return 2;
1012 }
1013 else if ((p->type == IFMATCH) || (p->type == UNLESSM) ||
1014 (p->type == SUSPEND))
1015 {
1016 return get_assertion_offset(p);
1017 }
1018
1019 /* fprintf(stderr, "type %d\n", p->type); */
1020 rc_error = "Offset not set";
1021 return -1;
1022 }
1023
get_size(regnode * rn)1024 static int get_size(regnode *rn)
1025 {
1026 int offs;
1027 regnode *e = rn;
1028
1029 while (e->type != END)
1030 {
1031 offs = GET_OFFSET(e);
1032 if (offs <= 0)
1033 {
1034 return -1;
1035 }
1036
1037 e += offs;
1038 }
1039
1040 return e - rn + 1;
1041 }
1042
1043 /* #define DEBUG_dump_data */
1044
find_internal(regexp * pt)1045 static regnode *find_internal(regexp *pt)
1046 {
1047 regexp_internal *pr;
1048 regnode *p;
1049 #ifdef DEBUG_dump_data
1050 struct reg_data *rdata;
1051 int n;
1052 #endif
1053
1054 assert(pt);
1055
1056 /* ActivePerl Build 1001 doesn't export PL_core_reg_engine, so
1057 the test, however useful, wouldn't link... */
1058 #if !defined(ACTIVEPERL_PRODUCT)
1059 if (pt->engine && (pt->engine != &PL_core_reg_engine))
1060 {
1061 rc_error = "Alternative regexp engine not supported";
1062 return 0;
1063 }
1064 #endif
1065
1066 pr = RXi_GET(pt);
1067 if (!pr)
1068 {
1069 rc_error = "Internal regexp not set";
1070 return 0;
1071 }
1072
1073 p = pr->program;
1074 if (!p)
1075 {
1076 rc_error = "Compiled regexp not set";
1077 return 0;
1078 }
1079
1080 if (!((p->flags == REG_MAGIC) &&
1081 (p->next_off == 0)))
1082 {
1083 /* fprintf(stderr, "%d %d %d\n", p->flags, p->type, p->next_off); */
1084 rc_error = "Invalid regexp signature";
1085 return 0;
1086 }
1087
1088 #ifdef DEBUG_dump_data
1089 rdata = pr->data;
1090 if (rdata)
1091 {
1092 fprintf(stderr, "regexp data count = %d\n", (int)(rdata->count));
1093 for (n = 0; n < rdata->count; ++n)
1094 {
1095 fprintf(stderr, "\twhat[%d] = %c\n", n, rdata->what[n]);
1096 }
1097 }
1098 else
1099 {
1100 fprintf(stderr, "no regexp data\n");
1101 }
1102 #endif
1103
1104 return p + 1;
1105 }
1106
parse_hex_digit(char d)1107 static unsigned char parse_hex_digit(char d)
1108 {
1109 unsigned char rv;
1110
1111 d = tolower(d);
1112
1113 if (('0' <= d) && (d <= '9'))
1114 {
1115 rv = d - '0';
1116 }
1117 else
1118 {
1119 rv = 10 + (d - 'a');
1120 }
1121
1122 return rv;
1123 }
1124
parse_hex_byte(const char * first)1125 static unsigned char parse_hex_byte(const char *first)
1126 {
1127 return 16 * parse_hex_digit(*first) +
1128 parse_hex_digit(first[1]);
1129 }
1130
get_forced_semantics(REGEXP * pt)1131 static unsigned get_forced_semantics(REGEXP *pt)
1132 {
1133 const char *precomp = RX_PRECOMP(pt);
1134 U32 prelen = RX_PRELEN(pt);
1135 int quoted = 0;
1136 int matched;
1137 unsigned forced = 0;
1138 U32 i;
1139 BitFlag bf;
1140 char c;
1141
1142 /* fprintf(stderr, "precomp = %*s\n", (int)prelen, precomp); */
1143
1144 for (i = 0; i < prelen; ++i)
1145 {
1146 c = precomp[i];
1147
1148 if (c == '.')
1149 {
1150 /* a dot does match Unicode character - the problem is
1151 that character might take up multiple bytes, and we
1152 don't want to match just one of them... */
1153 forced |= FORCED_BYTE;
1154 }
1155
1156 if (!quoted)
1157 {
1158 /* technically, the backslash might be in a comment, but
1159 parsing that is too much hassle */
1160 if (c == '\\')
1161 {
1162 quoted = 1;
1163 }
1164 }
1165 else
1166 {
1167 matched = 0;
1168
1169 if (c == 'N')
1170 {
1171 /* we have special cases only for \r & \n... */
1172 if ((i + 8 < prelen) &&
1173 !memcmp(precomp + i + 1, "{U+00", 5) &&
1174 isxdigit(precomp[i + 6]) && isxdigit(precomp[i + 7]) &&
1175 (precomp[i + 8] == '}'))
1176 {
1177 unsigned char x = parse_hex_byte(precomp + i + 6);
1178 if ((x != '\r') && (x != '\n'))
1179 {
1180 forced |= FORCED_CHAR;
1181 }
1182
1183 i += 8;
1184 }
1185 else if ((i + 1 < prelen) &&
1186 (precomp[i + 1] == '{'))
1187 {
1188 forced |= FORCED_CHAR;
1189 }
1190
1191 /* otherwise it's not an escape, but the inverse of \n
1192 - we aren't interested in that */
1193
1194 matched = 1;
1195 }
1196 else if (c == 'x')
1197 {
1198 if ((i + 2 < prelen) &&
1199 isxdigit(precomp[i + 1]) && isxdigit(precomp[i + 2]))
1200 {
1201 unsigned char x = parse_hex_byte(precomp + i + 1);
1202 if ((x != '\r') && (x != '\n'))
1203 {
1204 forced |= FORCED_BYTE;
1205 }
1206
1207 matched = 1;
1208 i += 2;
1209 }
1210 }
1211
1212 /* ...and we aren't bothering to parse octal numbers
1213 and \x{n+} at all... */
1214
1215 if (!matched)
1216 {
1217 init_bit_flag(&bf, (unsigned char)c);
1218 if (forced_byte[bf.offs] & bf.mask)
1219 {
1220 forced |= FORCED_BYTE;
1221 }
1222 }
1223
1224 quoted = 0;
1225 }
1226 }
1227
1228 return forced;
1229 }
1230
alloc_alt(regnode * p,int sz)1231 static regnode *alloc_alt(regnode *p, int sz)
1232 {
1233 regnode *alt;
1234
1235 alt = (regnode *)malloc(sizeof(regnode) * sz);
1236 if (!alt)
1237 {
1238 rc_error = "Could not allocate memory for regexp copy";
1239 return 0;
1240 }
1241
1242 memcpy(alt, p, sizeof(regnode) * sz);
1243
1244 return alt;
1245 }
1246
alloc_terminated(regnode * p,int sz)1247 static regnode *alloc_terminated(regnode *p, int sz)
1248 {
1249 regnode *alt;
1250 int last;
1251
1252 /* fprintf(stderr, "enter alloc_terminated(, %d\n", sz); */
1253
1254 assert(sz > 0);
1255 alt = alloc_alt(p, sz);
1256 if (!alt)
1257 {
1258 return 0;
1259 }
1260
1261 last = alt[sz - 1].type;
1262 /* fprintf(stderr, "type: %d\n", last); */
1263 if ((last >= REGNODE_MAX) || !trivial_nodes[last])
1264 {
1265 rc_error = "Alternative doesn't end like subexpression";
1266 return 0;
1267 }
1268
1269 alt[sz - 1].type = END;
1270 return alt;
1271 }
1272
bump_exact(Arrow * a)1273 static int bump_exact(Arrow *a)
1274 {
1275 int offs;
1276
1277 assert((a->rn->type == EXACT) || (a->rn->type == EXACTF) || (a->rn->type == EXACTFU)
1278 #ifdef RC_EXACT_ONLY8
1279 || (a->rn->type == EXACT_ONLY8)
1280 #endif
1281 #ifdef RC_EXACT_REQ8
1282 || (a->rn->type == EXACT_REQ8)
1283 #endif
1284 );
1285
1286 offs = GET_OFFSET(a->rn);
1287 if (offs <= 0)
1288 {
1289 return -1;
1290 }
1291
1292 #if defined(RC_EXACT_ONLY8) || defined(RC_EXACT_REQ8)
1293 if (a->rn->type ==
1294 #ifdef RC_EXACT_ONLY8
1295 EXACT_ONLY8
1296 #else
1297 EXACT_REQ8
1298 #endif
1299 )
1300 {
1301 while (*(((unsigned char *)((a)->rn + 1)) + (a)->spent) & 0x80)
1302 {
1303 ++(a->spent);
1304 }
1305 }
1306 #endif
1307
1308 if (++(a->spent) >= a->rn->flags)
1309 {
1310 a->spent = 0;
1311 a->rn += offs;
1312 }
1313
1314 return 1;
1315 }
1316
bump_regular(Arrow * a)1317 static int bump_regular(Arrow *a)
1318 {
1319 int offs;
1320
1321 assert(a->rn->type != END);
1322 assert(a->rn->type != EXACT);
1323 assert(a->rn->type != EXACTF);
1324 assert(!a->spent);
1325
1326 offs = GET_OFFSET(a->rn);
1327 if (offs <= 0)
1328 {
1329 return -1;
1330 }
1331
1332 a->rn += offs;
1333 return 1;
1334 }
1335
bump_with_check(Arrow * a)1336 static int bump_with_check(Arrow *a)
1337 {
1338 if (a->rn->type == END)
1339 {
1340 return 0;
1341 }
1342 else if ((a->rn->type == EXACT) || (a->rn->type == EXACTF) || (a->rn->type == EXACTFU)
1343 #ifdef RC_EXACT_ONLY8
1344 || (a->rn->type == EXACT_ONLY8)
1345 #endif
1346 #ifdef RC_EXACT_REQ8
1347 || (a->rn->type == EXACT_REQ8)
1348 #endif
1349 )
1350 {
1351 return bump_exact(a);
1352 }
1353 else
1354 {
1355 return bump_regular(a);
1356 }
1357 }
1358
get_jump_offset(regnode * p)1359 static int get_jump_offset(regnode *p)
1360 {
1361 int offs;
1362 regnode *q;
1363
1364 assert(p->type != END);
1365
1366 offs = GET_OFFSET(p);
1367 if (offs <= 0)
1368 {
1369 return -1;
1370 }
1371
1372 q = p + offs;
1373 while (trivial_nodes[q->type])
1374 {
1375 offs = GET_OFFSET(q);
1376 if (offs <= 0)
1377 {
1378 return -1;
1379 }
1380
1381 q += offs;
1382 }
1383
1384 return q - p;
1385 }
1386
rc_regcomp(SV * rs)1387 REGEXP *rc_regcomp(SV *rs)
1388 {
1389 REGEXP *rx;
1390
1391 if (!rs)
1392 {
1393 croak("No regexp to compare");
1394 }
1395
1396 rx = pregcomp(rs, 0);
1397 if (!rx)
1398 {
1399 croak("Cannot compile regexp");
1400 }
1401
1402 return rx;
1403 }
1404
rc_regfree(REGEXP * rx)1405 void rc_regfree(REGEXP *rx)
1406 {
1407 if (rx)
1408 {
1409 pregfree(rx);
1410 }
1411 }
1412
compare_mismatch(int anchored,Arrow * a1,Arrow * a2)1413 static int compare_mismatch(int anchored, Arrow *a1, Arrow *a2)
1414 {
1415 int rv;
1416
1417 /* fprintf(stderr, "enter compare_mismatch(%d...)\n", anchored); */
1418
1419 if (anchored)
1420 {
1421 return 0;
1422 }
1423 else
1424 {
1425 rv = bump_with_check(a1);
1426 if (rv <= 0)
1427 {
1428 return rv;
1429 }
1430
1431 return compare(0, a1, a2);
1432 }
1433 }
1434
compare_tails(int anchored,Arrow * a1,Arrow * a2)1435 static int compare_tails(int anchored, Arrow *a1, Arrow *a2)
1436 {
1437 Arrow tail1, tail2;
1438 int rv;
1439
1440 /* is it worth using StructCopy? */
1441 tail1 = *a1;
1442 rv = bump_with_check(&tail1);
1443 if (rv <= 0)
1444 {
1445 return rv;
1446 }
1447
1448 tail2 = *a2;
1449 rv = bump_with_check(&tail2);
1450 if (rv <= 0)
1451 {
1452 return rv;
1453 }
1454
1455 rv = compare(1, &tail1, &tail2);
1456 if (rv < 0)
1457 {
1458 return rv;
1459 }
1460
1461 if (!rv)
1462 {
1463 rv = compare_mismatch(anchored, a1, a2);
1464 }
1465 else
1466 {
1467 *a1 = tail1;
1468 *a2 = tail2;
1469 }
1470
1471 return rv;
1472 }
1473
compare_left_tail(int anchored,Arrow * a1,Arrow * a2)1474 static int compare_left_tail(int anchored, Arrow *a1, Arrow *a2)
1475 {
1476 Arrow tail1;
1477 int rv;
1478
1479 tail1 = *a1;
1480 rv = bump_with_check(&tail1);
1481 if (rv <= 0)
1482 {
1483 return rv;
1484 }
1485
1486 return compare(anchored, &tail1, a2);
1487 }
1488
compare_after_assertion(int anchored,Arrow * a1,Arrow * a2)1489 static int compare_after_assertion(int anchored, Arrow *a1, Arrow *a2)
1490 {
1491 Arrow tail1;
1492 int offs;
1493
1494 assert((a1->rn->type == IFMATCH) || (a1->rn->type == UNLESSM));
1495
1496 offs = get_assertion_offset(a1->rn);
1497 if (offs < 0)
1498 {
1499 return offs;
1500 }
1501
1502 tail1.origin = a1->origin;
1503 tail1.rn = a1->rn + offs;
1504 tail1.spent = 0;
1505 return compare(anchored, &tail1, a2);
1506 }
1507
compare_positive_assertions(int anchored,Arrow * a1,Arrow * a2)1508 static int compare_positive_assertions(int anchored, Arrow *a1, Arrow *a2)
1509 {
1510 regnode *p1, *alt1, *p2, *alt2;
1511 int rv, sz1, sz2;
1512 Arrow left, right;
1513
1514 p1 = a1->rn;
1515 p2 = a2->rn;
1516 assert(p1->type == IFMATCH);
1517 assert(p2->type == IFMATCH);
1518
1519 sz1 = get_assertion_offset(p1);
1520 if (sz1 < 0)
1521 {
1522 return -1;
1523 }
1524
1525 sz2 = get_assertion_offset(p2);
1526 if (sz2 < 0)
1527 {
1528 return -1;
1529 }
1530
1531 alt1 = alloc_terminated(p1 + 2, sz1 - 2);
1532 if (!alt1)
1533 {
1534 return -1;
1535 }
1536
1537 alt2 = alloc_terminated(p2 + 2, sz2 - 2);
1538 if (!alt2)
1539 {
1540 free(alt1);
1541 return -1;
1542 }
1543
1544 left.origin = a1->origin;
1545 left.rn = alt1;
1546 left.spent = 0;
1547 right.origin = a2->origin;
1548 right.rn = alt2;
1549 right.spent = 0;
1550 rv = compare(0, &left, &right);
1551
1552 free(alt1);
1553 free(alt2);
1554
1555 if (rv <= 0)
1556 {
1557 return rv;
1558 }
1559
1560 /* left & right.origin stays a1 & a2->origin, respectively */
1561 left.rn = p1 + sz1;
1562 left.spent = 0;
1563 right.rn = p2 + sz2;
1564 right.spent = 0;
1565 return compare(anchored, &left, &right);
1566 }
1567
compare_negative_assertions(int anchored,Arrow * a1,Arrow * a2)1568 static int compare_negative_assertions(int anchored, Arrow *a1, Arrow *a2)
1569 {
1570 regnode *p1, *alt1, *p2, *alt2;
1571 int rv, sz1, sz2;
1572 Arrow left, right;
1573
1574 p1 = a1->rn;
1575 p2 = a2->rn;
1576 assert(p1->type == UNLESSM);
1577 assert(p2->type == UNLESSM);
1578
1579 sz1 = get_assertion_offset(p1);
1580 if (sz1 < 0)
1581 {
1582 return -1;
1583 }
1584
1585 sz2 = get_assertion_offset(p2);
1586 if (sz2 < 0)
1587 {
1588 return -1;
1589 }
1590
1591 alt1 = alloc_terminated(p1 + 2, sz1 - 2);
1592 if (!alt1)
1593 {
1594 return -1;
1595 }
1596
1597 alt2 = alloc_terminated(p2 + 2, sz2 - 2);
1598 if (!alt2)
1599 {
1600 free(alt1);
1601 return -1;
1602 }
1603
1604 left.origin = a1->origin;
1605 left.rn = alt1;
1606 left.spent = 0;
1607 right.origin = a2->origin;
1608 right.rn = alt2;
1609 right.spent = 0;
1610 rv = compare(0, &right, &left);
1611
1612 free(alt1);
1613 free(alt2);
1614
1615 if (rv <= 0)
1616 {
1617 return rv;
1618 }
1619
1620 /* left & right.origin stays a1 & a2->origin, respectively */
1621 left.rn = p1 + sz1;
1622 left.spent = 0;
1623 right.rn = p2 + sz2;
1624 right.spent = 0;
1625 return compare(anchored, &left, &right);
1626 }
1627
compare_subexpressions(int anchored,Arrow * a1,Arrow * a2)1628 static int compare_subexpressions(int anchored, Arrow *a1, Arrow *a2)
1629 {
1630 regnode *p1, *alt1, *p2, *alt2;
1631 int rv, sz1, sz2;
1632 Arrow left, right;
1633
1634 p1 = a1->rn;
1635 p2 = a2->rn;
1636 assert(p1->type == SUSPEND);
1637 assert(p2->type == SUSPEND);
1638
1639 sz1 = get_assertion_offset(p1);
1640 if (sz1 < 0)
1641 {
1642 return -1;
1643 }
1644
1645 sz2 = get_assertion_offset(p2);
1646 if (sz2 < 0)
1647 {
1648 return -1;
1649 }
1650
1651 alt1 = alloc_terminated(p1 + 2, sz1 - 2);
1652 if (!alt1)
1653 {
1654 return -1;
1655 }
1656
1657 alt2 = alloc_terminated(p2 + 2, sz2 - 2);
1658 if (!alt2)
1659 {
1660 free(alt1);
1661 return -1;
1662 }
1663
1664 left.origin = a1->origin;
1665 left.rn = alt1;
1666 left.spent = 0;
1667 right.origin = a2->origin;
1668 right.rn = alt2;
1669 right.spent = 0;
1670 rv = compare(1, &left, &right);
1671
1672 free(alt1);
1673 free(alt2);
1674
1675 if (rv <= 0)
1676 {
1677 return rv;
1678 }
1679
1680 /* left & right.origin stays a1 & a2->origin, respectively */
1681 left.rn = p1 + sz1;
1682 left.spent = 0;
1683 right.rn = p2 + sz2;
1684 right.spent = 0;
1685 return compare(1, &left, &right);
1686 }
1687
compare_bol(int anchored,Arrow * a1,Arrow * a2)1688 static int compare_bol(int anchored, Arrow *a1, Arrow *a2)
1689 {
1690 int rv;
1691
1692 assert((a1->rn->type == MBOL) || (a1->rn->type == SBOL));
1693
1694 if (anchored)
1695 {
1696 return 0;
1697 }
1698
1699 if (bump_regular(a1) <= 0)
1700 {
1701 return -1;
1702 }
1703
1704 rv = compare(1, a1, a2);
1705 if (!rv)
1706 {
1707 rv = compare_mismatch(0, a1, a2);
1708 }
1709
1710 return rv;
1711 }
1712
get_bitmap_byte(regnode * p,int i)1713 static unsigned char get_bitmap_byte(regnode *p, int i)
1714 {
1715 unsigned char *bitmap;
1716 unsigned char loc;
1717
1718 assert((p->type == ANYOF) || (p->type == ANYOFD));
1719
1720 bitmap = (unsigned char *)(p + 2);
1721 if ((i >= 16) && (p->type == ANYOFD) &&
1722 (p->flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
1723 {
1724 loc = 0xff;
1725 }
1726 else
1727 {
1728 loc = bitmap[i];
1729 }
1730
1731 if (p->flags & ANYOF_INVERT)
1732 {
1733 loc = ~loc;
1734 }
1735
1736 return loc;
1737 }
1738
compare_bitmaps(int anchored,Arrow * a1,Arrow * a2,unsigned char * b1,unsigned char * b2)1739 static int compare_bitmaps(int anchored, Arrow *a1, Arrow *a2,
1740 unsigned char *b1, unsigned char *b2)
1741 {
1742 /* Note that aN->flags must be ignored when bN is set (necessary
1743 for ANYOFM, where they aren't really flags and can't be used as
1744 such). */
1745 unsigned char loc1, loc2;
1746 int i;
1747
1748 /* fprintf(stderr, "enter compare_bitmaps(%d, %d, %d)\n", anchored,
1749 a1->rn->type, a2->rn->type); */
1750
1751 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1752 {
1753 loc1 = b1 ? b1[i] : get_bitmap_byte(a1->rn, i);
1754 loc2 = b2 ? b2[i] : get_bitmap_byte(a2->rn, i);
1755 if (loc1 & ~loc2)
1756 {
1757 /* fprintf(stderr, "compare_bitmaps fails at %d: %d does not imply %d\n",
1758 i, loc1, loc2); */
1759 return compare_mismatch(anchored, a1, a2);
1760 }
1761 }
1762
1763 return compare_tails(anchored, a1, a2);
1764 }
1765
compare_negative_bitmaps(int anchored,Arrow * a1,Arrow * a2,unsigned char * b1,unsigned char * b2)1766 static int compare_negative_bitmaps(int anchored, Arrow *a1, Arrow *a2,
1767 unsigned char *b1, unsigned char *b2)
1768 {
1769 unsigned char loc1, loc2;
1770 int i;
1771
1772 assert(b1 && b2);
1773 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1774 {
1775 loc1 = b1[i];
1776 loc2 = b2[i];
1777 if (~loc1 & loc2)
1778 {
1779 return compare_mismatch(anchored, a1, a2);
1780 }
1781 }
1782
1783 return compare_tails(anchored, a1, a2);
1784 }
1785
compare_anyof_multiline(int anchored,Arrow * a1,Arrow * a2)1786 static int compare_anyof_multiline(int anchored, Arrow *a1, Arrow *a2)
1787 {
1788 BitFlag bf;
1789 Arrow tail1, tail2;
1790 unsigned char req;
1791 int i;
1792
1793 /* fprintf(stderr, "enter compare_anyof_multiline\n"); */
1794
1795 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
1796 assert((a2->rn->type == MBOL) || (a2->rn->type == MEOL));
1797
1798 if (a1->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
1799 {
1800 return compare_mismatch(anchored, a1, a2);
1801 }
1802
1803 init_bit_flag(&bf, '\n');
1804 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1805 {
1806 req = (i != bf.offs) ? 0 : bf.mask;
1807 if (get_bitmap_byte(a1->rn, i) != req)
1808 {
1809 return compare_mismatch(anchored, a1, a2);
1810 }
1811 }
1812
1813 tail1 = *a1;
1814 if (bump_regular(&tail1) <= 0)
1815 {
1816 return -1;
1817 }
1818
1819 tail2 = *a2;
1820 if (bump_regular(&tail2) <= 0)
1821 {
1822 return -1;
1823 }
1824
1825 return compare(1, &tail1, &tail2);
1826 }
1827
1828 #ifdef RC_ANYOFR
compare_anyofr_multiline(int anchored,Arrow * a1,Arrow * a2)1829 static int compare_anyofr_multiline(int anchored, Arrow *a1, Arrow *a2)
1830 {
1831 unsigned char req;
1832 int i;
1833 BitFlag bf;
1834 Arrow tail1, tail2;
1835 unsigned char left[ANYOF_BITMAP_SIZE];
1836
1837 assert(a1->rn->type == ANYOFR);
1838 assert((a2->rn->type == MBOL) || (a2->rn->type == MEOL));
1839
1840 if (!convert_anyofr_to_bitmap(a1, left))
1841 {
1842 return compare_mismatch(anchored, a1, a2);
1843 }
1844
1845 init_bit_flag(&bf, '\n');
1846 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1847 {
1848 req = (i != bf.offs) ? 0 : bf.mask;
1849 if (left[i] != req)
1850 {
1851 return compare_mismatch(anchored, a1, a2);
1852 }
1853 }
1854
1855 tail1 = *a1;
1856 if (bump_regular(&tail1) <= 0)
1857 {
1858 return -1;
1859 }
1860
1861 tail2 = *a2;
1862 if (bump_regular(&tail2) <= 0)
1863 {
1864 return -1;
1865 }
1866
1867 return compare(1, &tail1, &tail2);
1868 }
1869 #endif
1870
compare_anyofm_multiline(int anchored,Arrow * a1,Arrow * a2)1871 static int compare_anyofm_multiline(int anchored, Arrow *a1, Arrow *a2)
1872 {
1873 unsigned char req;
1874 int i;
1875 BitFlag bf;
1876 Arrow tail1, tail2;
1877 unsigned char left[ANYOF_BITMAP_SIZE];
1878
1879 assert(a1->rn->type == ANYOFM);
1880 assert((a2->rn->type == MBOL) || (a2->rn->type == MEOL));
1881
1882 if (!convert_anyofm_to_bitmap(a1, left))
1883 {
1884 return compare_mismatch(anchored, a1, a2);
1885 }
1886
1887 init_bit_flag(&bf, '\n');
1888 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1889 {
1890 req = (i != bf.offs) ? 0 : bf.mask;
1891 if (left[i] != req)
1892 {
1893 return compare_mismatch(anchored, a1, a2);
1894 }
1895 }
1896
1897 tail1 = *a1;
1898 if (bump_regular(&tail1) <= 0)
1899 {
1900 return -1;
1901 }
1902
1903 tail2 = *a2;
1904 if (bump_regular(&tail2) <= 0)
1905 {
1906 return -1;
1907 }
1908
1909 return compare(1, &tail1, &tail2);
1910 }
1911
compare_nanyofm_multiline(int anchored,Arrow * a1,Arrow * a2)1912 static int compare_nanyofm_multiline(int anchored, Arrow *a1, Arrow *a2)
1913 {
1914 unsigned char req;
1915 int i;
1916 BitFlag bf;
1917 Arrow tail1, tail2;
1918 unsigned char left[ANYOF_BITMAP_SIZE];
1919
1920 assert(a1->rn->type == NANYOFM);
1921 assert((a2->rn->type == MBOL) || (a2->rn->type == MEOL));
1922
1923 if (!convert_anyofm_to_bitmap(a1, left))
1924 {
1925 return compare_mismatch(anchored, a1, a2);
1926 }
1927
1928 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1929 {
1930 left[i] = ~left[i];
1931 }
1932
1933 init_bit_flag(&bf, '\n');
1934 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
1935 {
1936 req = (i != bf.offs) ? 0 : bf.mask;
1937 if (left[i] != req)
1938 {
1939 return compare_mismatch(anchored, a1, a2);
1940 }
1941 }
1942
1943 tail1 = *a1;
1944 if (bump_regular(&tail1) <= 0)
1945 {
1946 return -1;
1947 }
1948
1949 tail2 = *a2;
1950 if (bump_regular(&tail2) <= 0)
1951 {
1952 return -1;
1953 }
1954
1955 return compare(1, &tail1, &tail2);
1956 }
1957
compare_anyof_anyof(int anchored,Arrow * a1,Arrow * a2)1958 static int compare_anyof_anyof(int anchored, Arrow *a1, Arrow *a2)
1959 {
1960 int extra_left;
1961
1962 /* fprintf(stderr, "enter compare_anyof_anyof(%d\n", anchored); */
1963
1964 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
1965 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
1966
1967 extra_left = ANYOF_FLAGS(a1->rn) &
1968 ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
1969 if ((extra_left || (a1->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)) &&
1970 !(a2->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP))
1971 {
1972 U32 m1, m2;
1973 int cr1, cr2;
1974
1975 /* fprintf(stderr, "comparing invlists: left flags = 0x%x, right flags = 0x%x\n", (int)(a1->rn->flags), (int)(a2->rn->flags)); */
1976 /* before recognizing standard invlists, check whether they
1977 aren't the same - this duplicates the code to get to the
1978 invlist, but works even for non-standard ones,
1979 e.g. [\da] */
1980 if ((a1->rn->flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP) &&
1981 (a2->rn->flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP))
1982 {
1983 SV *invlist1 = get_invlist_sv(a1);
1984 SV *invlist2 = get_invlist_sv(a2);
1985 if (invlist1 && invlist2)
1986 {
1987 UV ill1 = get_invlist_len(invlist1);
1988 UV ill2 = get_invlist_len(invlist2);
1989 if (ill1 && (ill1 == ill2))
1990 {
1991 UV *ila1 = invlist_array(invlist1);
1992 UV *ila2 = invlist_array(invlist2);
1993 if (!memcmp(ila1, ila2, ill1 * sizeof(UV)))
1994 {
1995 return compare_bitmaps(anchored, a1, a2, 0, 0);
1996 }
1997 }
1998 }
1999 }
2000
2001 cr1 = convert_map(a1, &m1);
2002 if (cr1 == -1)
2003 {
2004 return -1;
2005 }
2006
2007 cr2 = convert_map(a2, &m2);
2008 if (cr2 == -1)
2009 {
2010 return -1;
2011 }
2012
2013 /* clearly this hould happen at a lower level, but there it
2014 breaks other paths... */
2015 if (m2 & NOT_ALNUM_BLOCK)
2016 {
2017 m2 |= NOT_ALPHA_BLOCK | NOT_NUMBER_BLOCK;
2018 m2 = extend_mask(m2);
2019 }
2020
2021 if (!cr1 || !cr2 || (m1 & ~m2))
2022 {
2023 /* fprintf(stderr, "cr1 = %d, cr2 = %d, m1 = 0x%x, m2 = 0x%x\n",
2024 cr1, cr2, (unsigned)m1, (unsigned)m2); */
2025 return compare_mismatch(anchored, a1, a2);
2026 }
2027 }
2028
2029 return compare_bitmaps(anchored, a1, a2, 0, 0);
2030 }
2031
2032 #ifdef RC_ANYOFR
compare_anyof_anyofr(int anchored,Arrow * a1,Arrow * a2)2033 static int compare_anyof_anyofr(int anchored, Arrow *a1, Arrow *a2)
2034 {
2035 unsigned char right[ANYOF_BITMAP_SIZE];
2036
2037 /* fprintf(stderr, "enter compare_anyof_anyofr(%d\n", anchored); */
2038
2039 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
2040 assert(a2->rn->type == ANYOFR);
2041
2042 if (ANYOF_FLAGS(a1->rn) & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)
2043 {
2044 return compare_mismatch(anchored, a1, a2);
2045 }
2046
2047 if (!convert_anyofr_to_bitmap(a2, right))
2048 {
2049 return compare_mismatch(anchored, a1, a2);
2050 }
2051
2052 return compare_bitmaps(anchored, a1, a2, 0, right);
2053 }
2054
compare_anyofr_anyofm(int anchored,Arrow * a1,Arrow * a2)2055 static int compare_anyofr_anyofm(int anchored, Arrow *a1, Arrow *a2)
2056 {
2057 unsigned char left[ANYOF_BITMAP_SIZE];
2058 unsigned char right[ANYOF_BITMAP_SIZE];
2059
2060 assert(a1->rn->type == ANYOFR);
2061 assert(a2->rn->type == ANYOFM);
2062
2063 if (!convert_anyofr_to_bitmap(a1, left))
2064 {
2065 return compare_mismatch(anchored, a1, a2);
2066 }
2067
2068 if (!convert_anyofm_to_bitmap(a2, right))
2069 {
2070 return compare_mismatch(anchored, a1, a2);
2071 }
2072
2073 return compare_bitmaps(anchored, a1, a2, left, right);
2074 }
2075
compare_anyofr_anyofr(int anchored,Arrow * a1,Arrow * a2)2076 static int compare_anyofr_anyofr(int anchored, Arrow *a1, Arrow *a2)
2077 {
2078 unsigned char left[ANYOF_BITMAP_SIZE];
2079 unsigned char right[ANYOF_BITMAP_SIZE];
2080
2081 assert(a1->rn->type == ANYOFR);
2082 assert(a2->rn->type == ANYOFR);
2083
2084 if (!convert_anyofr_to_bitmap(a1, left))
2085 {
2086 return compare_mismatch(anchored, a1, a2);
2087 }
2088
2089 if (!convert_anyofr_to_bitmap(a2, right))
2090 {
2091 return compare_mismatch(anchored, a1, a2);
2092 }
2093
2094 return compare_bitmaps(anchored, a1, a2, left, right);
2095 }
2096
compare_anyofm_anyofr(int anchored,Arrow * a1,Arrow * a2)2097 static int compare_anyofm_anyofr(int anchored, Arrow *a1, Arrow *a2)
2098 {
2099 unsigned char left[ANYOF_BITMAP_SIZE];
2100 unsigned char right[ANYOF_BITMAP_SIZE];
2101
2102 assert(a1->rn->type == ANYOFM);
2103 assert(a2->rn->type == ANYOFR);
2104
2105 if (!convert_anyofm_to_bitmap(a1, left))
2106 {
2107 return compare_mismatch(anchored, a1, a2);
2108 }
2109
2110 if (!convert_anyofr_to_bitmap(a2, right))
2111 {
2112 return compare_mismatch(anchored, a1, a2);
2113 }
2114
2115 return compare_bitmaps(anchored, a1, a2, left, right);
2116 }
2117
compare_anyofr_anyof(int anchored,Arrow * a1,Arrow * a2)2118 static int compare_anyofr_anyof(int anchored, Arrow *a1, Arrow *a2)
2119 {
2120 unsigned char left[ANYOF_BITMAP_SIZE];
2121
2122 /* fprintf(stderr, "enter compare_anyofr_anyof(%d\n", anchored); */
2123
2124 assert(a1->rn->type == ANYOFR);
2125 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2126
2127 if (!convert_anyofr_to_bitmap(a1, left))
2128 {
2129 return compare_mismatch(anchored, a1, a2);
2130 }
2131
2132 return compare_bitmaps(anchored, a1, a2, left, 0);
2133 }
2134 #endif
2135
compare_anyof_anyofm(int anchored,Arrow * a1,Arrow * a2)2136 static int compare_anyof_anyofm(int anchored, Arrow *a1, Arrow *a2)
2137 {
2138 unsigned char right[ANYOF_BITMAP_SIZE];
2139
2140 /* fprintf(stderr, "enter compare_anyof_anyofm(%d\n", anchored); */
2141
2142 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
2143 assert(a2->rn->type == ANYOFM);
2144
2145 if (ANYOF_FLAGS(a1->rn) & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)
2146 {
2147 return compare_mismatch(anchored, a1, a2);
2148 }
2149
2150 if (!convert_anyofm_to_bitmap(a2, right))
2151 {
2152 return compare_mismatch(anchored, a1, a2);
2153 }
2154
2155 return compare_bitmaps(anchored, a1, a2, 0, right);
2156 }
2157
compare_anyofm_anyof(int anchored,Arrow * a1,Arrow * a2)2158 static int compare_anyofm_anyof(int anchored, Arrow *a1, Arrow *a2)
2159 {
2160 unsigned char left[ANYOF_BITMAP_SIZE];
2161
2162 /* fprintf(stderr, "enter compare_anyofm_anyof(%d\n", anchored); */
2163
2164 assert(a1->rn->type == ANYOFM);
2165 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2166
2167 if (!convert_anyofm_to_bitmap(a1, left))
2168 {
2169 return compare_mismatch(anchored, a1, a2);
2170 }
2171
2172 return compare_bitmaps(anchored, a1, a2, left, 0);
2173 }
2174
compare_anyofm_anyofm(int anchored,Arrow * a1,Arrow * a2)2175 static int compare_anyofm_anyofm(int anchored, Arrow *a1, Arrow *a2)
2176 {
2177 unsigned char left[ANYOF_BITMAP_SIZE];
2178 unsigned char right[ANYOF_BITMAP_SIZE];
2179
2180 assert(a1->rn->type == ANYOFM);
2181 assert(a2->rn->type == ANYOFM);
2182
2183 if (!convert_anyofm_to_bitmap(a1, left))
2184 {
2185 return compare_mismatch(anchored, a1, a2);
2186 }
2187
2188 if (!convert_anyofm_to_bitmap(a2, right))
2189 {
2190 return compare_mismatch(anchored, a1, a2);
2191 }
2192
2193 return compare_bitmaps(anchored, a1, a2, left, right);
2194 }
2195
compare_anyof_nanyofm(int anchored,Arrow * a1,Arrow * a2)2196 static int compare_anyof_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2197 {
2198 int i;
2199 unsigned char right[ANYOF_BITMAP_SIZE];
2200
2201 /* fprintf(stderr, "enter compare_anyof_nanyofn(%d\n", anchored); */
2202
2203 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
2204 assert(a2->rn->type == NANYOFM);
2205
2206 /* fprintf(stderr, "left flags = 0x%x\n", a1->rn->flags); */
2207
2208 if ((a1->rn->flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP) ||
2209 ((a1->rn->flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER) &&
2210 !(a1->rn->flags & ANYOF_INVERT)))
2211 {
2212 return compare_mismatch(anchored, a1, a2);
2213 }
2214
2215 if (!convert_anyofm_to_bitmap(a2, right))
2216 {
2217 return compare_mismatch(anchored, a1, a2);
2218 }
2219
2220 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
2221 {
2222 right[i] = ~right[i];
2223 }
2224
2225 return compare_bitmaps(anchored, a1, a2, 0, right);
2226 }
2227
2228 #ifdef RC_ANYOFR
compare_anyofr_nanyofm(int anchored,Arrow * a1,Arrow * a2)2229 static int compare_anyofr_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2230 {
2231 int i;
2232 unsigned char left[ANYOF_BITMAP_SIZE];
2233 unsigned char right[ANYOF_BITMAP_SIZE];
2234
2235 assert(a1->rn->type == ANYOFR);
2236 assert(a2->rn->type == NANYOFM);
2237
2238 if (!convert_anyofr_to_bitmap(a1, left))
2239 {
2240 return compare_mismatch(anchored, a1, a2);
2241 }
2242
2243 if (!convert_anyofm_to_bitmap(a2, right))
2244 {
2245 return compare_mismatch(anchored, a1, a2);
2246 }
2247
2248 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
2249 {
2250 right[i] = ~right[i];
2251 }
2252
2253 return compare_bitmaps(anchored, a1, a2, left, right);
2254 }
2255 #endif
2256
compare_anyofm_nanyofm(int anchored,Arrow * a1,Arrow * a2)2257 static int compare_anyofm_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2258 {
2259 int i;
2260 unsigned char left[ANYOF_BITMAP_SIZE];
2261 unsigned char right[ANYOF_BITMAP_SIZE];
2262
2263 assert(a1->rn->type == ANYOFM);
2264 assert(a2->rn->type == NANYOFM);
2265
2266 if (!convert_anyofm_to_bitmap(a1, left))
2267 {
2268 return compare_mismatch(anchored, a1, a2);
2269 }
2270
2271 if (!convert_anyofm_to_bitmap(a2, right))
2272 {
2273 return compare_mismatch(anchored, a1, a2);
2274 }
2275
2276 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
2277 {
2278 right[i] = ~right[i];
2279 }
2280
2281 return compare_bitmaps(anchored, a1, a2, left, right);
2282 }
2283
compare_nanyofm_nanyofm(int anchored,Arrow * a1,Arrow * a2)2284 static int compare_nanyofm_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2285 {
2286 unsigned char left[ANYOF_BITMAP_SIZE];
2287 unsigned char right[ANYOF_BITMAP_SIZE];
2288
2289 assert(a1->rn->type == NANYOFM);
2290 assert(a2->rn->type == NANYOFM);
2291
2292 if (!convert_anyofm_to_bitmap(a1, left))
2293 {
2294 return compare_mismatch(anchored, a1, a2);
2295 }
2296
2297 if (!convert_anyofm_to_bitmap(a2, right))
2298 {
2299 return compare_mismatch(anchored, a1, a2);
2300 }
2301
2302 return compare_negative_bitmaps(anchored, a1, a2, left, right);
2303 }
2304
2305 /* compare_bitmaps could replace this method, but when a class
2306 contains just a few characters, it seems more natural to compare
2307 them explicitly */
compare_short_byte_class(int anchored,Arrow * a1,Arrow * a2,ByteClass * left)2308 static int compare_short_byte_class(int anchored, Arrow *a1, Arrow *a2,
2309 ByteClass *left)
2310 {
2311 BitFlag bf;
2312 int i;
2313
2314 for (i = 0; i < left->expl_size; ++i)
2315 {
2316 init_bit_flag(&bf, (unsigned char)left->expl[i]);
2317 if (!(get_bitmap_byte(a2->rn, bf.offs) & bf.mask))
2318 {
2319 return compare_mismatch(anchored, a1, a2);
2320 }
2321 }
2322
2323 return compare_tails(anchored, a1, a2);
2324 }
2325
compare_right_full(int anchored,Arrow * a1,Arrow * a2)2326 static int compare_right_full(int anchored, Arrow *a1, Arrow *a2)
2327 {
2328 int i;
2329
2330 for (i = 0; i < 16; ++i)
2331 {
2332 if (!(get_bitmap_byte(a2->rn, i) & 0xff))
2333 {
2334 return compare_mismatch(anchored, a1, a2);
2335 }
2336 }
2337
2338 return compare_tails(anchored, a1, a2);
2339 }
2340
compare_posix_posix(int anchored,Arrow * a1,Arrow * a2)2341 static int compare_posix_posix(int anchored, Arrow *a1, Arrow *a2)
2342 {
2343 U32 m1, m2;
2344 int cr1, cr2;
2345
2346 /* fprintf(stderr, "enter compare_posix_posix\n"); */
2347
2348 cr1 = convert_class(a1, &m1);
2349 cr2 = convert_class(a2, &m2);
2350 if (!cr1 || !cr2 || (m1 & ~m2))
2351 {
2352 return compare_mismatch(anchored, a1, a2);
2353 }
2354
2355 return compare_tails(anchored, a1, a2);
2356 }
2357
compare_posix_negative_posix(int anchored,Arrow * a1,Arrow * a2)2358 static int compare_posix_negative_posix(int anchored, Arrow *a1, Arrow *a2)
2359 {
2360 U32 m1, m2;
2361 int cr1, cr2;
2362
2363 /* fprintf(stderr, "enter compare_posix_negative_posix\n"); */
2364
2365 cr1 = convert_class(a1, &m1);
2366 cr2 = convert_class(a2, &m2);
2367 if (!cr1 || !cr2)
2368 {
2369 return compare_mismatch(anchored, a1, a2);
2370 }
2371
2372 /* vertical space is not a strict subset of space, but it does
2373 have space elements, so we have to require space on the right */
2374 if ((m1 & VERTICAL_SPACE_BLOCK) && !(m2 & VERTICAL_SPACE_BLOCK))
2375 {
2376 m1 |= SPACE_BLOCK;
2377 }
2378
2379 if (m1 & m2)
2380 {
2381 return compare_mismatch(anchored, a1, a2);
2382 }
2383
2384 return compare_tails(anchored, a1, a2);
2385 }
2386
compare_negative_posix_negative_posix(int anchored,Arrow * a1,Arrow * a2)2387 static int compare_negative_posix_negative_posix(int anchored, Arrow *a1, Arrow *a2)
2388 {
2389 U32 m1, m2;
2390 int cr1, cr2;
2391
2392 assert((a1->rn->type == NPOSIXD) || (a1->rn->type == NPOSIXU) ||
2393 (a1->rn->type == NPOSIXA));
2394 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
2395 (a2->rn->type == NPOSIXA));
2396
2397 /* fprintf(stderr, "enter compare_negative_posix_negative_posix\n"); */
2398
2399 cr1 = convert_negative_class(a1, &m1);
2400 cr2 = convert_negative_class(a2, &m2);
2401 if (!cr2 || !cr2 || (m1 & ~m2))
2402 {
2403 return compare_mismatch(anchored, a1, a2);
2404 }
2405
2406 return compare_tails(anchored, a1, a2);
2407 }
2408
compare_exact_posix(int anchored,Arrow * a1,Arrow * a2)2409 static int compare_exact_posix(int anchored, Arrow *a1, Arrow *a2)
2410 {
2411 char *seq;
2412
2413 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) ||
2414 (a1->rn->type == EXACTFU));
2415 assert((a2->rn->type == POSIXD) || (a2->rn->type == POSIXU) ||
2416 (a2->rn->type == POSIXA));
2417
2418 seq = GET_LITERAL(a1);
2419
2420 if (!_generic_isCC_A(*seq, a2->rn->flags))
2421 {
2422 return compare_mismatch(anchored, a1, a2);
2423 }
2424
2425 return compare_tails(anchored, a1, a2);
2426 }
2427
compare_exactf_posix(int anchored,Arrow * a1,Arrow * a2)2428 static int compare_exactf_posix(int anchored, Arrow *a1, Arrow *a2)
2429 {
2430 char *seq;
2431 char unf[2];
2432 int i;
2433
2434 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2435 assert(a2->rn->type == POSIXD);
2436
2437 seq = GET_LITERAL(a1);
2438 init_unfolded(unf, *seq);
2439
2440 for (i = 0; i < 2; ++i)
2441 {
2442 if (!_generic_isCC_A(unf[i], a2->rn->flags))
2443 {
2444 return compare_mismatch(anchored, a1, a2);
2445 }
2446 }
2447
2448 return compare_tails(anchored, a1, a2);
2449 }
2450
compare_exact_negative_posix(int anchored,Arrow * a1,Arrow * a2)2451 static int compare_exact_negative_posix(int anchored, Arrow *a1, Arrow *a2)
2452 {
2453 char *seq;
2454
2455 assert(a1->rn->type == EXACT);
2456 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
2457 (a2->rn->type == NPOSIXA));
2458
2459 seq = GET_LITERAL(a1);
2460
2461 if (_generic_isCC_A(*seq, a2->rn->flags))
2462 {
2463 return compare_mismatch(anchored, a1, a2);
2464 }
2465
2466 return compare_tails(anchored, a1, a2);
2467 }
2468
compare_exactf_negative_posix(int anchored,Arrow * a1,Arrow * a2)2469 static int compare_exactf_negative_posix(int anchored, Arrow *a1, Arrow *a2)
2470 {
2471 char *seq;
2472 char unf[2];
2473 int i;
2474
2475 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2476 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
2477 (a2->rn->type == NPOSIXA));
2478
2479 seq = GET_LITERAL(a1);
2480 init_unfolded(unf, *seq);
2481
2482 for (i = 0; i < 2; ++i)
2483 {
2484 if (_generic_isCC_A(unf[i], a2->rn->flags))
2485 {
2486 return compare_mismatch(anchored, a1, a2);
2487 }
2488 }
2489
2490 return compare_tails(anchored, a1, a2);
2491 }
2492
compare_reg_any_anyof(int anchored,Arrow * a1,Arrow * a2)2493 static int compare_reg_any_anyof(int anchored, Arrow *a1, Arrow *a2)
2494 {
2495 assert(a1->rn->type == REG_ANY);
2496 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2497
2498 return compare_bitmaps(anchored, a1, a2, ndot.nbitmap, 0);
2499 }
2500
compare_posix_anyof(int anchored,Arrow * a1,Arrow * a2)2501 static int compare_posix_anyof(int anchored, Arrow *a1, Arrow *a2)
2502 {
2503 U32 left_block;
2504 unsigned char *b;
2505
2506 /* fprintf(stderr, "enter compare_posix_anyof\n"); */
2507
2508 assert((a1->rn->type == POSIXD) || (a1->rn->type == POSIXU) ||
2509 (a1->rn->type == POSIXA));
2510 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2511
2512 if (!convert_class_narrow(a1, &left_block))
2513 {
2514 return compare_mismatch(anchored, a1, a2);
2515 }
2516
2517 /* fprintf(stderr, "right flags = %d\n", a2->rn->flags); */
2518
2519 if (!(a2->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP))
2520 {
2521 U32 right_map;
2522
2523 int cr = convert_map(a2, &right_map);
2524 if (cr == -1)
2525 {
2526 return -1;
2527 }
2528
2529 if (!cr || !(right_map & left_block))
2530 {
2531 return compare_mismatch(anchored, a1, a2);
2532 }
2533 }
2534
2535 /* fprintf(stderr, "left flags = %d\n", a1->rn->flags); */
2536
2537 if (a1->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
2538 {
2539 return compare_mismatch(anchored, a1, a2);
2540 }
2541
2542 b = posix_regclass_bitmaps[a1->rn->flags];
2543 if (!b)
2544 {
2545 return compare_mismatch(anchored, a1, a2);
2546 }
2547
2548 return compare_bitmaps(anchored, a1, a2, b, 0);
2549 }
2550
compare_negative_posix_anyof(int anchored,Arrow * a1,Arrow * a2)2551 static int compare_negative_posix_anyof(int anchored, Arrow *a1, Arrow *a2)
2552 {
2553 U32 left_block;
2554 unsigned char *b;
2555
2556 /* fprintf(stderr, "enter compare_negative_posix_anyof\n"); */
2557
2558 assert((a1->rn->type == NPOSIXD) || (a1->rn->type == NPOSIXU) ||
2559 (a1->rn->type == NPOSIXA));
2560 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2561
2562 if (!convert_class_narrow(a1, &left_block))
2563 {
2564 return compare_mismatch(anchored, a1, a2);
2565 }
2566
2567 /* fprintf(stderr, "right flags = 0x%x\n", a2->rn->flags); */
2568
2569 left_block = EVERY_BLOCK & ~left_block;
2570
2571 /* fprintf(stderr, "left %d -> 0x%x\n", a1->rn->flags, (unsigned)left_block); */
2572
2573 if (!(a2->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP))
2574 {
2575 U32 right_map;
2576
2577 int cr = convert_map(a2, &right_map);
2578 if (cr == -1)
2579 {
2580 return -1;
2581 }
2582
2583 if (a2->rn->flags & ANYOF_INVERT)
2584 {
2585 right_map = EVERY_BLOCK & ~right_map;
2586 }
2587
2588 /* fprintf(stderr, "right map = 0x%x\n", (unsigned)right_map); */
2589
2590 if (!cr || !(right_map & left_block))
2591 {
2592 return compare_mismatch(anchored, a1, a2);
2593 }
2594 }
2595
2596 if (a1->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
2597 {
2598 return compare_mismatch(anchored, a1, a2);
2599 }
2600
2601 b = posix_regclass_nbitmaps[a1->rn->flags];
2602 if (!b)
2603 {
2604 return compare_mismatch(anchored, a1, a2);
2605 }
2606
2607 return compare_bitmaps(anchored, a1, a2, b, 0);
2608 }
2609
compare_exact_anyof(int anchored,Arrow * a1,Arrow * a2)2610 static int compare_exact_anyof(int anchored, Arrow *a1, Arrow *a2)
2611 {
2612 BitFlag bf;
2613 char *seq;
2614
2615 /* fprintf(stderr, "enter compare_exact_anyof(%d, \n", anchored); */
2616
2617 assert(a1->rn->type == EXACT);
2618 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2619
2620 seq = GET_LITERAL(a1);
2621 init_bit_flag(&bf, (unsigned char)(*seq));
2622
2623 if (!(get_bitmap_byte(a2->rn, bf.offs) & bf.mask))
2624 {
2625 return compare_mismatch(anchored, a1, a2);
2626 }
2627
2628 return compare_tails(anchored, a1, a2);
2629 }
2630
compare_exactf_anyof(int anchored,Arrow * a1,Arrow * a2)2631 static int compare_exactf_anyof(int anchored, Arrow *a1, Arrow *a2)
2632 {
2633 BitFlag bf;
2634 char *seq;
2635 char unf[2];
2636 int i;
2637
2638 /* fprintf(stderr, "enter compare_exactf_anyof(%d, \n", anchored); */
2639
2640 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2641 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2642
2643 seq = GET_LITERAL(a1);
2644 init_unfolded(unf, *seq);
2645
2646 for (i = 0; i < 2; ++i)
2647 {
2648 init_bit_flag(&bf, (unsigned char)unf[i]);
2649 if (!(get_bitmap_byte(a2->rn, bf.offs) & bf.mask))
2650 {
2651 return compare_mismatch(anchored, a1, a2);
2652 }
2653 }
2654
2655 return compare_tails(anchored, a1, a2);
2656 }
2657
2658 #ifdef RC_ANYOFR
compare_exact_anyofr(int anchored,Arrow * a1,Arrow * a2)2659 static int compare_exact_anyofr(int anchored, Arrow *a1, Arrow *a2)
2660 {
2661 unsigned char *seq;
2662 unsigned char right[ANYOF_BITMAP_SIZE];
2663 BitFlag bf;
2664
2665 /* fprintf(stderr, "enter compare_exact_anyofr(%d, \n", anchored); */
2666
2667 assert(a1->rn->type == EXACT);
2668 assert(a2->rn->type == ANYOFR);
2669
2670 seq = GET_LITERAL(a1);
2671 init_bit_flag(&bf, *seq);
2672
2673 if (!convert_anyofr_to_bitmap(a2, right))
2674 {
2675 return compare_mismatch(anchored, a1, a2);
2676 }
2677
2678 if (right[bf.offs] & bf.mask)
2679 {
2680 return compare_tails(anchored, a1, a2);
2681 }
2682
2683 return compare_mismatch(anchored, a1, a2);
2684 }
2685
compare_exactf_anyofr(int anchored,Arrow * a1,Arrow * a2)2686 static int compare_exactf_anyofr(int anchored, Arrow *a1, Arrow *a2)
2687 {
2688 char *seq;
2689 int i;
2690 char left[2];
2691 unsigned char right[ANYOF_BITMAP_SIZE];
2692 BitFlag bf;
2693
2694 /* fprintf(stderr, "enter compare_exactf_anyofr(%d, \n", anchored); */
2695
2696 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2697 assert(a2->rn->type == ANYOFR);
2698
2699 seq = GET_LITERAL(a1);
2700 init_unfolded(left, *seq);
2701
2702 if (!convert_anyofr_to_bitmap(a2, right))
2703 {
2704 return compare_mismatch(anchored, a1, a2);
2705 }
2706
2707 for (i = 0; i < 2; ++i)
2708 {
2709 init_bit_flag(&bf, left[i]);
2710 if (!(right[bf.offs] & bf.mask))
2711 {
2712 return compare_mismatch(anchored, a1, a2);
2713 }
2714 }
2715
2716 return compare_tails(anchored, a1, a2);
2717 }
2718 #endif
2719
compare_exact_anyofm(int anchored,Arrow * a1,Arrow * a2)2720 static int compare_exact_anyofm(int anchored, Arrow *a1, Arrow *a2)
2721 {
2722 unsigned char *seq;
2723 unsigned char right[ANYOF_BITMAP_SIZE];
2724 BitFlag bf;
2725
2726 /* fprintf(stderr, "enter compare_exact_anyofm(%d, \n", anchored); */
2727
2728 assert(a1->rn->type == EXACT);
2729 assert(a2->rn->type == ANYOFM);
2730
2731 seq = GET_LITERAL(a1);
2732 init_bit_flag(&bf, *seq);
2733
2734 if (!convert_anyofm_to_bitmap(a2, right))
2735 {
2736 return compare_mismatch(anchored, a1, a2);
2737 }
2738
2739 if (right[bf.offs] & bf.mask)
2740 {
2741 return compare_tails(anchored, a1, a2);
2742 }
2743
2744 return compare_mismatch(anchored, a1, a2);
2745 }
2746
compare_exactf_anyofm(int anchored,Arrow * a1,Arrow * a2)2747 static int compare_exactf_anyofm(int anchored, Arrow *a1, Arrow *a2)
2748 {
2749 char *seq;
2750 int i;
2751 char left[2];
2752 unsigned char right[ANYOF_BITMAP_SIZE];
2753 BitFlag bf;
2754
2755 /* fprintf(stderr, "enter compare_exactf_anyofm(%d, \n", anchored); */
2756
2757 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2758 assert(a2->rn->type == ANYOFM);
2759
2760 seq = GET_LITERAL(a1);
2761 init_unfolded(left, *seq);
2762
2763 if (!convert_anyofm_to_bitmap(a2, right))
2764 {
2765 return compare_mismatch(anchored, a1, a2);
2766 }
2767
2768 for (i = 0; i < 2; ++i)
2769 {
2770 init_bit_flag(&bf, left[i]);
2771 if (!(right[bf.offs] & bf.mask))
2772 {
2773 return compare_mismatch(anchored, a1, a2);
2774 }
2775 }
2776
2777 return compare_tails(anchored, a1, a2);
2778 }
2779
compare_exact_nanyofm(int anchored,Arrow * a1,Arrow * a2)2780 static int compare_exact_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2781 {
2782 char *seq;
2783 unsigned char right[ANYOF_BITMAP_SIZE];
2784 BitFlag bf;
2785
2786 assert(a1->rn->type == EXACT);
2787 assert(a2->rn->type == NANYOFM);
2788
2789 seq = GET_LITERAL(a1);
2790 init_bit_flag(&bf, *seq);
2791
2792 if (!convert_anyofm_to_bitmap(a2, right))
2793 {
2794 return compare_mismatch(anchored, a1, a2);
2795 }
2796
2797 if (right[bf.offs] & bf.mask)
2798 {
2799 return compare_mismatch(anchored, a1, a2);
2800 }
2801
2802 return compare_tails(anchored, a1, a2);
2803 }
2804
compare_exactf_nanyofm(int anchored,Arrow * a1,Arrow * a2)2805 static int compare_exactf_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2806 {
2807 char *seq;
2808 int i;
2809 char left[2];
2810 unsigned char right[ANYOF_BITMAP_SIZE];
2811 BitFlag bf;
2812
2813 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2814 assert(a2->rn->type == NANYOFM);
2815
2816 seq = GET_LITERAL(a1);
2817 init_unfolded(left, *seq);
2818
2819 if (!convert_anyofm_to_bitmap(a2, right))
2820 {
2821 return compare_mismatch(anchored, a1, a2);
2822 }
2823
2824 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
2825 {
2826 right[i] = ~right[i];
2827 }
2828
2829 for (i = 0; i < 2; ++i)
2830 {
2831 init_bit_flag(&bf, left[i]);
2832 if (!(right[bf.offs] & bf.mask))
2833 {
2834 return compare_mismatch(anchored, a1, a2);
2835 }
2836 }
2837
2838 return compare_tails(anchored, a1, a2);
2839 }
2840
compare_posix_nanyofm(int anchored,Arrow * a1,Arrow * a2)2841 static int compare_posix_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2842 {
2843 int i;
2844 unsigned char *b;
2845 unsigned char right[ANYOF_BITMAP_SIZE];
2846
2847 assert((a1->rn->type == POSIXD) || (a1->rn->type == POSIXU) ||
2848 (a1->rn->type == POSIXA));
2849 assert(a2->rn->type == NANYOFM);
2850
2851 if (a1->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
2852 {
2853 return compare_mismatch(anchored, a1, a2);
2854 }
2855
2856 b = posix_regclass_bitmaps[a1->rn->flags];
2857 if (!b)
2858 {
2859 return compare_mismatch(anchored, a1, a2);
2860 }
2861
2862 if (!convert_anyofm_to_bitmap(a2, right))
2863 {
2864 return compare_mismatch(anchored, a1, a2);
2865 }
2866
2867 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
2868 {
2869 right[i] = ~right[i];
2870 }
2871
2872 return compare_bitmaps(anchored, a1, a2, b, right);
2873 }
2874
compare_negative_posix_nanyofm(int anchored,Arrow * a1,Arrow * a2)2875 static int compare_negative_posix_nanyofm(int anchored, Arrow *a1, Arrow *a2)
2876 {
2877 int i;
2878 unsigned char *b;
2879 unsigned char right[ANYOF_BITMAP_SIZE];
2880
2881 assert((a1->rn->type == NPOSIXD) || (a1->rn->type == NPOSIXU) ||
2882 (a1->rn->type == NPOSIXA));
2883 assert(a2->rn->type == NANYOFM);
2884
2885 if (a1->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
2886 {
2887 return compare_mismatch(anchored, a1, a2);
2888 }
2889
2890 /* positive, because negative bitmaps are compared below */
2891 b = posix_regclass_bitmaps[a1->rn->flags];
2892 if (!b)
2893 {
2894 return compare_mismatch(anchored, a1, a2);
2895 }
2896
2897 if (!convert_anyofm_to_bitmap(a2, right))
2898 {
2899 return compare_mismatch(anchored, a1, a2);
2900 }
2901
2902 return compare_negative_bitmaps(anchored, a1, a2, b, right);
2903 }
2904
compare_exact_lnbreak(int anchored,Arrow * a1,Arrow * a2)2905 static int compare_exact_lnbreak(int anchored, Arrow *a1, Arrow *a2)
2906 {
2907 char *cur;
2908 char *next;
2909
2910 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) ||
2911 (a1->rn->type == EXACTFU));
2912 assert(a2->rn->type == LNBREAK);
2913
2914 cur = GET_LITERAL(a1);
2915
2916 /* first, check 2-character newline */
2917 if ((*cur == '\r') && ((a1->spent + 1) < a1->rn->flags))
2918 {
2919 /* we're ignoring the possibility the \n is in a different
2920 node, but that probably doesn't happen */
2921 next = (((char *)(a1->rn + 1)) + a1->spent + 1);
2922 if (*next == '\n')
2923 {
2924 ++(a1->spent);
2925 return compare_tails(anchored, a1, a2);
2926 }
2927 }
2928
2929 /* otherwise, check vertical space */
2930 if (!_generic_isCC_A(*cur, _CC_VERTSPACE))
2931 {
2932 return compare_mismatch(anchored, a1, a2);
2933 }
2934
2935 return compare_tails(anchored, a1, a2);
2936 }
2937
compare_exact_byte_class(int anchored,Arrow * a1,Arrow * a2,char * lookup)2938 static int compare_exact_byte_class(int anchored, Arrow *a1, Arrow *a2,
2939 char *lookup)
2940 {
2941 char *seq;
2942
2943 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
2944
2945 seq = GET_LITERAL(a1);
2946
2947 if (!lookup[(unsigned char)(*seq)])
2948 {
2949 return compare_mismatch(anchored, a1, a2);
2950 }
2951
2952 return compare_tails(anchored, a1, a2);
2953 }
2954
compare_exact_multiline(int anchored,Arrow * a1,Arrow * a2)2955 static int compare_exact_multiline(int anchored, Arrow *a1, Arrow *a2)
2956 {
2957 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) ||
2958 (a1->rn->type == EXACTFU));
2959 assert((a2->rn->type == MBOL) || (a2->rn->type == MEOL));
2960
2961 return compare_exact_byte_class(anchored, a1, a2,
2962 ndot.lookup);
2963 }
2964
compare_sany_anyof(int anchored,Arrow * a1,Arrow * a2)2965 static int compare_sany_anyof(int anchored, Arrow *a1, Arrow *a2)
2966 {
2967 /* fprintf(stderr, "enter compare_sany_anyof\n"); */
2968
2969 assert(a1->rn->type == SANY);
2970 assert((a2->rn->type == ANYOF) || (a2->rn->type == ANYOFD));
2971
2972 /* fprintf(stderr, "left flags = 0x%x, right flags = 0x%x\n",
2973 a1->rn->flags, a2->rn->flags); */
2974
2975 if (a2->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
2976 {
2977 return compare_right_full(anchored, a1, a2);
2978 }
2979
2980 return compare_mismatch(anchored, a1, a2);
2981 }
2982
compare_anyof_reg_any(int anchored,Arrow * a1,Arrow * a2)2983 static int compare_anyof_reg_any(int anchored, Arrow *a1, Arrow *a2)
2984 {
2985 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
2986 assert(a2->rn->type == REG_ANY);
2987
2988 return compare_bitmaps(anchored, a1, a2, 0, ndot.nbitmap);
2989 }
2990
2991 #ifdef RC_ANYOFR
compare_anyofr_reg_any(int anchored,Arrow * a1,Arrow * a2)2992 static int compare_anyofr_reg_any(int anchored, Arrow *a1, Arrow *a2)
2993 {
2994 unsigned char left[ANYOF_BITMAP_SIZE];
2995
2996 assert(a1->rn->type == ANYOFR);
2997 assert(a2->rn->type == REG_ANY);
2998
2999 if (!convert_anyofr_to_bitmap(a1, left))
3000 {
3001 return compare_mismatch(anchored, a1, a2);
3002 }
3003
3004 return compare_bitmaps(anchored, a1, a2, left, ndot.nbitmap);
3005 }
3006 #endif
3007
compare_anyofm_reg_any(int anchored,Arrow * a1,Arrow * a2)3008 static int compare_anyofm_reg_any(int anchored, Arrow *a1, Arrow *a2)
3009 {
3010 unsigned char left[ANYOF_BITMAP_SIZE];
3011
3012 assert(a1->rn->type == ANYOFM);
3013 assert(a2->rn->type == REG_ANY);
3014
3015 if (!convert_anyofm_to_bitmap(a1, left))
3016 {
3017 return compare_mismatch(anchored, a1, a2);
3018 }
3019
3020 return compare_bitmaps(anchored, a1, a2, left, ndot.nbitmap);
3021 }
3022
compare_nanyofm_reg_any(int anchored,Arrow * a1,Arrow * a2)3023 static int compare_nanyofm_reg_any(int anchored, Arrow *a1, Arrow *a2)
3024 {
3025 unsigned char left[ANYOF_BITMAP_SIZE];
3026
3027 assert(a1->rn->type == NANYOFM);
3028 assert(a2->rn->type == REG_ANY);
3029
3030 if (!convert_anyofm_to_bitmap(a1, left))
3031 {
3032 return compare_mismatch(anchored, a1, a2);
3033 }
3034
3035 return compare_negative_bitmaps(anchored, a1, a2, left, ndot.bitmap);
3036 }
3037
compare_anyof_lnbreak(int anchored,Arrow * a1,Arrow * a2)3038 static int compare_anyof_lnbreak(int anchored, Arrow *a1, Arrow *a2)
3039 {
3040 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3041 assert(a2->rn->type == LNBREAK);
3042
3043 return compare_bitmaps(anchored, a1, a2, 0, vertical_whitespace.bitmap);
3044 }
3045
compare_exact_reg_any(int anchored,Arrow * a1,Arrow * a2)3046 static int compare_exact_reg_any(int anchored, Arrow *a1, Arrow *a2)
3047 {
3048 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
3049 assert(a2->rn->type == REG_ANY);
3050
3051 return compare_exact_byte_class(anchored, a1, a2, ndot.nlookup);
3052 }
3053
compare_anyof_posix(int anchored,Arrow * a1,Arrow * a2)3054 static int compare_anyof_posix(int anchored, Arrow *a1, Arrow *a2)
3055 {
3056 unsigned char *b;
3057
3058 /* fprintf(stderr, "enter compare_anyof_posix\n"); */
3059
3060 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3061 assert((a2->rn->type == POSIXD) || (a2->rn->type == POSIXU) || (a2->rn->type == POSIXA));
3062
3063 if (a2->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
3064 {
3065 /* fprintf(stderr, "flags = %d\n", a2->rn->flags); */
3066 return compare_mismatch(anchored, a1, a2);
3067 }
3068
3069 b = posix_regclass_bitmaps[a2->rn->flags];
3070 if (!b)
3071 {
3072 /* fprintf(stderr, "no bitmap for flags = %d\n", a2->rn->flags); */
3073 return compare_mismatch(anchored, a1, a2);
3074 }
3075
3076 return compare_bitmaps(anchored, a1, a2, 0, b);
3077 }
3078
3079 #ifdef RC_ANYOFR
compare_anyofr_posix(int anchored,Arrow * a1,Arrow * a2)3080 static int compare_anyofr_posix(int anchored, Arrow *a1, Arrow *a2)
3081 {
3082 unsigned char *b;
3083 unsigned char left[ANYOF_BITMAP_SIZE];
3084
3085 /* fprintf(stderr, "enter compare_anyofr_posix\n"); */
3086
3087 assert(a1->rn->type == ANYOFR);
3088 assert((a2->rn->type == POSIXD) || (a2->rn->type == POSIXU) || (a2->rn->type == POSIXA));
3089
3090 if (!convert_anyofr_to_bitmap(a1, left))
3091 {
3092 return compare_mismatch(anchored, a1, a2);
3093 }
3094
3095 b = posix_regclass_bitmaps[a2->rn->flags];
3096 if (!b)
3097 {
3098 return compare_mismatch(anchored, a1, a2);
3099 }
3100
3101 return compare_bitmaps(anchored, a1, a2, left, b);
3102 }
3103 #endif
3104
compare_anyofm_posix(int anchored,Arrow * a1,Arrow * a2)3105 static int compare_anyofm_posix(int anchored, Arrow *a1, Arrow *a2)
3106 {
3107 unsigned char *b;
3108 unsigned char left[ANYOF_BITMAP_SIZE];
3109
3110 /* fprintf(stderr, "enter compare_anyofm_posix\n"); */
3111
3112 assert(a1->rn->type == ANYOFM);
3113 assert((a2->rn->type == POSIXD) || (a2->rn->type == POSIXU) || (a2->rn->type == POSIXA));
3114
3115 if (!convert_anyofm_to_bitmap(a1, left))
3116 {
3117 return compare_mismatch(anchored, a1, a2);
3118 }
3119
3120 b = posix_regclass_bitmaps[a2->rn->flags];
3121 if (!b)
3122 {
3123 return compare_mismatch(anchored, a1, a2);
3124 }
3125
3126 return compare_bitmaps(anchored, a1, a2, left, b);
3127 }
3128
compare_nanyofm_posix(int anchored,Arrow * a1,Arrow * a2)3129 static int compare_nanyofm_posix(int anchored, Arrow *a1, Arrow *a2)
3130 {
3131 unsigned char *b;
3132 unsigned char left[ANYOF_BITMAP_SIZE];
3133
3134 assert(a1->rn->type == NANYOFM);
3135 assert((a2->rn->type == POSIXD) || (a2->rn->type == POSIXU) || (a2->rn->type == POSIXA));
3136
3137 if (!convert_anyofm_to_bitmap(a1, left))
3138 {
3139 return compare_mismatch(anchored, a1, a2);
3140 }
3141
3142 b = posix_regclass_nbitmaps[a2->rn->flags];
3143 if (!b)
3144 {
3145 return compare_mismatch(anchored, a1, a2);
3146 }
3147
3148 return compare_negative_bitmaps(anchored, a1, a2, left, b);
3149 }
3150
compare_anyof_posixa(int anchored,Arrow * a1,Arrow * a2)3151 static int compare_anyof_posixa(int anchored, Arrow *a1, Arrow *a2)
3152 {
3153 unsigned char *b;
3154
3155 /* fprintf(stderr, "enter compare_anyof_posixa\n"); */
3156
3157 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3158 assert(a2->rn->type == POSIXA);
3159
3160 if (ANYOF_FLAGS(a1->rn) & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)
3161 {
3162 return compare_mismatch(anchored, a1, a2);
3163 }
3164
3165 if (a2->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
3166 {
3167 /* fprintf(stderr, "flags = %d\n", a2->rn->flags); */
3168 return compare_mismatch(anchored, a1, a2);
3169 }
3170
3171 b = posix_regclass_bitmaps[a2->rn->flags];
3172 if (!b)
3173 {
3174 /* fprintf(stderr, "no bitmap for flags = %d\n", a2->rn->flags); */
3175 return compare_mismatch(anchored, a1, a2);
3176 }
3177
3178 return compare_bitmaps(anchored, a1, a2, 0, b);
3179 }
3180
compare_anyof_negative_posix(int anchored,Arrow * a1,Arrow * a2)3181 static int compare_anyof_negative_posix(int anchored, Arrow *a1, Arrow *a2)
3182 {
3183 unsigned char *b;
3184
3185 /* fprintf(stderr, "enter compare_anyof_negative_posix\n"); */
3186
3187 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3188 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
3189 (a2->rn->type == NPOSIXA));
3190
3191 if (a2->rn->flags >= SIZEOF_ARRAY(posix_regclass_nbitmaps))
3192 {
3193 /* fprintf(stderr, "flags = %d\n", a2->rn->flags); */
3194 return compare_mismatch(anchored, a1, a2);
3195 }
3196
3197 b = posix_regclass_nbitmaps[a2->rn->flags];
3198 if (!b)
3199 {
3200 /* fprintf(stderr, "no negative bitmap for flags = %d\n", a2->rn->flags); */
3201 return compare_mismatch(anchored, a1, a2);
3202 }
3203
3204 return compare_bitmaps(anchored, a1, a2, 0, b);
3205 }
3206
3207 #ifdef RC_ANYOFR
compare_anyofr_negative_posix(int anchored,Arrow * a1,Arrow * a2)3208 static int compare_anyofr_negative_posix(int anchored, Arrow *a1, Arrow *a2)
3209 {
3210 unsigned char *posix_bitmap;
3211 unsigned char anyof_bitmap[ANYOF_BITMAP_SIZE];
3212
3213 /* fprintf(stderr, "enter compare_anyofr_negative_posix\n"); */
3214
3215 assert(a1->rn->type == ANYOFR);
3216 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
3217 (a2->rn->type == NPOSIXA));
3218
3219 if (!convert_anyofr_to_bitmap(a1, anyof_bitmap))
3220 {
3221 return compare_mismatch(anchored, a1, a2);
3222 }
3223
3224 if (a2->rn->flags >= SIZEOF_ARRAY(posix_regclass_nbitmaps))
3225 {
3226 /* fprintf(stderr, "flags = %d\n", a2->rn->flags); */
3227 return compare_mismatch(anchored, a1, a2);
3228 }
3229
3230 posix_bitmap = posix_regclass_nbitmaps[a2->rn->flags];
3231 if (!posix_bitmap)
3232 {
3233 /* fprintf(stderr, "no negative bitmap for flags = %d\n", a2->rn->flags); */
3234 return compare_mismatch(anchored, a1, a2);
3235 }
3236
3237 return compare_bitmaps(anchored, a1, a2, anyof_bitmap, posix_bitmap);
3238 }
3239 #endif
3240
compare_anyofm_negative_posix(int anchored,Arrow * a1,Arrow * a2)3241 static int compare_anyofm_negative_posix(int anchored, Arrow *a1, Arrow *a2)
3242 {
3243 unsigned char *posix_bitmap;
3244 unsigned char anyof_bitmap[ANYOF_BITMAP_SIZE];
3245
3246 /* fprintf(stderr, "enter compare_anyofm_negative_posix\n"); */
3247
3248 assert(a1->rn->type == ANYOFM);
3249 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
3250 (a2->rn->type == NPOSIXA));
3251
3252 if (!convert_anyofm_to_bitmap(a1, anyof_bitmap))
3253 {
3254 return compare_mismatch(anchored, a1, a2);
3255 }
3256
3257 if (a2->rn->flags >= SIZEOF_ARRAY(posix_regclass_nbitmaps))
3258 {
3259 /* fprintf(stderr, "flags = %d\n", a2->rn->flags); */
3260 return compare_mismatch(anchored, a1, a2);
3261 }
3262
3263 posix_bitmap = posix_regclass_nbitmaps[a2->rn->flags];
3264 if (!posix_bitmap)
3265 {
3266 /* fprintf(stderr, "no negative bitmap for flags = %d\n", a2->rn->flags); */
3267 return compare_mismatch(anchored, a1, a2);
3268 }
3269
3270 return compare_bitmaps(anchored, a1, a2, anyof_bitmap, posix_bitmap);
3271 }
3272
compare_nanyofm_negative_posix(int anchored,Arrow * a1,Arrow * a2)3273 static int compare_nanyofm_negative_posix(int anchored, Arrow *a1, Arrow *a2)
3274 {
3275 unsigned char *posix_bitmap;
3276 unsigned char anyof_bitmap[ANYOF_BITMAP_SIZE];
3277
3278 assert(a1->rn->type == NANYOFM);
3279 assert((a2->rn->type == NPOSIXD) || (a2->rn->type == NPOSIXU) ||
3280 (a2->rn->type == NPOSIXA));
3281
3282 if (!convert_anyofm_to_bitmap(a1, anyof_bitmap))
3283 {
3284 return compare_mismatch(anchored, a1, a2);
3285 }
3286
3287 if (a2->rn->flags >= SIZEOF_ARRAY(posix_regclass_bitmaps))
3288 {
3289 return compare_mismatch(anchored, a1, a2);
3290 }
3291
3292 posix_bitmap = posix_regclass_bitmaps[a2->rn->flags];
3293 if (!posix_bitmap)
3294 {
3295 return compare_mismatch(anchored, a1, a2);
3296 }
3297
3298 return compare_negative_bitmaps(anchored, a1, a2, anyof_bitmap, posix_bitmap);
3299 }
3300
compare_posix_reg_any(int anchored,Arrow * a1,Arrow * a2)3301 static int compare_posix_reg_any(int anchored, Arrow *a1, Arrow *a2)
3302 {
3303 assert((a1->rn->type == POSIXD) || (a1->rn->type == POSIXU) ||
3304 (a1->rn->type == POSIXA));
3305 assert(a2->rn->type == REG_ANY);
3306
3307 U8 flags = a1->rn->flags;
3308 if (flags >= SIZEOF_ARRAY(newline_posix_regclasses))
3309 {
3310 /* fprintf(stderr, "unknown POSIX character class %d\n", flags); */
3311 rc_error = "unknown POSIX character class";
3312 return -1;
3313 }
3314
3315 if (newline_posix_regclasses[flags])
3316 {
3317 return compare_mismatch(anchored, a1, a2);
3318 }
3319
3320 return compare_tails(anchored, a1, a2);
3321 }
3322
compare_negative_posix_reg_any(int anchored,Arrow * a1,Arrow * a2)3323 static int compare_negative_posix_reg_any(int anchored, Arrow *a1, Arrow *a2)
3324 {
3325 assert((a1->rn->type == NPOSIXD) || (a1->rn->type == NPOSIXU) ||
3326 (a1->rn->type == NPOSIXA));
3327 assert(a2->rn->type == REG_ANY);
3328
3329 U8 flags = a1->rn->flags;
3330 if (flags >= SIZEOF_ARRAY(newline_posix_regclasses))
3331 {
3332 rc_error = "unknown negative POSIX character class";
3333 return -1;
3334 }
3335
3336 if (!newline_posix_regclasses[flags])
3337 {
3338 return compare_mismatch(anchored, a1, a2);
3339 }
3340
3341 return compare_tails(anchored, a1, a2);
3342 }
3343
compare_posix_lnbreak(int anchored,Arrow * a1,Arrow * a2)3344 static int compare_posix_lnbreak(int anchored, Arrow *a1, Arrow *a2)
3345 {
3346 assert((a1->rn->type == POSIXD) || (a1->rn->type == POSIXU) ||
3347 (a1->rn->type == POSIXA));
3348 assert(a2->rn->type == LNBREAK);
3349
3350 if (a1->rn->flags != _CC_VERTSPACE)
3351 {
3352 return compare_mismatch(anchored, a1, a2);
3353 }
3354
3355 return compare_tails(anchored, a1, a2);
3356 }
3357
compare_anyof_exact(int anchored,Arrow * a1,Arrow * a2)3358 static int compare_anyof_exact(int anchored, Arrow *a1, Arrow *a2)
3359 {
3360 BitFlag bf;
3361 char *seq;
3362 int i;
3363 unsigned char req;
3364
3365 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3366 assert(a2->rn->type == EXACT);
3367
3368 if (a1->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
3369 {
3370 return compare_mismatch(anchored, a1, a2);
3371 }
3372
3373 seq = GET_LITERAL(a2);
3374 init_bit_flag(&bf, *((unsigned char *)seq));
3375
3376 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
3377 {
3378 req = (i != bf.offs) ? 0 : bf.mask;
3379 if (get_bitmap_byte(a1->rn, i) != req)
3380 {
3381 return compare_mismatch(anchored, a1, a2);
3382 }
3383 }
3384
3385 return compare_tails(anchored, a1, a2);
3386 }
3387
3388 #ifdef RC_ANYOFR
compare_anyofr_exact(int anchored,Arrow * a1,Arrow * a2)3389 static int compare_anyofr_exact(int anchored, Arrow *a1, Arrow *a2)
3390 {
3391 unsigned char left[ANYOF_BITMAP_SIZE];
3392 BitFlag bf;
3393 char *seq;
3394 int i;
3395 unsigned char req;
3396
3397 assert(a1->rn->type == ANYOFR);
3398 assert(a2->rn->type == EXACT);
3399
3400 if (!convert_anyofr_to_bitmap(a1, left))
3401 {
3402 return compare_mismatch(anchored, a1, a2);
3403 }
3404
3405 seq = GET_LITERAL(a2);
3406 init_bit_flag(&bf, *((unsigned char *)seq));
3407
3408 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
3409 {
3410 req = (i != bf.offs) ? 0 : bf.mask;
3411 if (left[i] != req)
3412 {
3413 return compare_mismatch(anchored, a1, a2);
3414 }
3415 }
3416
3417 return compare_tails(anchored, a1, a2);
3418 }
3419 #endif
3420
compare_anyofm_exact(int anchored,Arrow * a1,Arrow * a2)3421 static int compare_anyofm_exact(int anchored, Arrow *a1, Arrow *a2)
3422 {
3423 unsigned char left[ANYOF_BITMAP_SIZE];
3424 BitFlag bf;
3425 char *seq;
3426 int i;
3427 unsigned char req;
3428
3429 assert(a1->rn->type == ANYOFM);
3430 assert(a2->rn->type == EXACT);
3431
3432 if (!convert_anyofm_to_bitmap(a1, left))
3433 {
3434 return compare_mismatch(anchored, a1, a2);
3435 }
3436
3437 seq = GET_LITERAL(a2);
3438 init_bit_flag(&bf, *((unsigned char *)seq));
3439
3440 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
3441 {
3442 req = (i != bf.offs) ? 0 : bf.mask;
3443 if (left[i] != req)
3444 {
3445 return compare_mismatch(anchored, a1, a2);
3446 }
3447 }
3448
3449 return compare_tails(anchored, a1, a2);
3450 }
3451
compare_anyof_exactf(int anchored,Arrow * a1,Arrow * a2)3452 static int compare_anyof_exactf(int anchored, Arrow *a1, Arrow *a2)
3453 {
3454 char *seq;
3455 char unf[2];
3456 BitFlag bf[2];
3457 unsigned char right[ANYOF_BITMAP_SIZE];
3458 int i;
3459
3460 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3461 assert((a2->rn->type == EXACTF) || (a2->rn->type == EXACTFU));
3462
3463 if (a1->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
3464 {
3465 return compare_mismatch(anchored, a1, a2);
3466 }
3467
3468 seq = GET_LITERAL(a2);
3469 init_unfolded(unf, *seq);
3470
3471 for (i = 0; i < 2; ++i)
3472 {
3473 init_bit_flag(bf + i, (unsigned char)(unf[i]));
3474 }
3475
3476 if (bf[0].offs == bf[1].offs)
3477 {
3478 bf[0].mask = bf[1].mask = bf[0].mask | bf[1].mask;
3479 }
3480
3481 memset(right, 0, ANYOF_BITMAP_SIZE);
3482 for (i = 0; i < 2; ++i)
3483 {
3484 right[bf[i].offs] = bf[i].mask;
3485 }
3486
3487 return compare_bitmaps(anchored, a1, a2, 0, right);
3488 }
3489
3490 #ifdef RC_ANYOFR
compare_anyofr_exactf(int anchored,Arrow * a1,Arrow * a2)3491 static int compare_anyofr_exactf(int anchored, Arrow *a1, Arrow *a2)
3492 {
3493 char *seq;
3494 int i;
3495 BitFlag bf;
3496 unsigned char left[ANYOF_BITMAP_SIZE];
3497 unsigned char right[ANYOF_BITMAP_SIZE];
3498 char unf[2];
3499
3500 /* fprintf(stderr, "enter compare_anyofr_exactf(%d, \n", anchored); */
3501
3502 assert(a1->rn->type == ANYOFR);
3503 assert((a2->rn->type == EXACTF) || (a2->rn->type == EXACTFU));
3504
3505 if (!convert_anyofr_to_bitmap(a1, left))
3506 {
3507 return compare_mismatch(anchored, a1, a2);
3508 }
3509
3510 seq = GET_LITERAL(a2);
3511 init_unfolded(unf, *seq);
3512
3513 memset(right, 0, ANYOF_BITMAP_SIZE);
3514 for (i = 0; i < 2; ++i)
3515 {
3516 init_bit_flag(&bf, unf[i]);
3517 right[bf.offs] = bf.mask;
3518 }
3519
3520 return compare_bitmaps(anchored, a1, a2, left, right);
3521 }
3522 #endif
3523
compare_anyofm_exactf(int anchored,Arrow * a1,Arrow * a2)3524 static int compare_anyofm_exactf(int anchored, Arrow *a1, Arrow *a2)
3525 {
3526 char *seq;
3527 int i;
3528 BitFlag bf;
3529 unsigned char left[ANYOF_BITMAP_SIZE];
3530 unsigned char right[ANYOF_BITMAP_SIZE];
3531 char unf[2];
3532
3533 /* fprintf(stderr, "enter compare_anyofm_exactf(%d, \n", anchored); */
3534
3535 assert(a1->rn->type == ANYOFM);
3536 assert((a2->rn->type == EXACTF) || (a2->rn->type == EXACTFU));
3537
3538 if (!convert_anyofm_to_bitmap(a1, left))
3539 {
3540 return compare_mismatch(anchored, a1, a2);
3541 }
3542
3543 seq = GET_LITERAL(a2);
3544 init_unfolded(unf, *seq);
3545
3546 memset(right, 0, ANYOF_BITMAP_SIZE);
3547 for (i = 0; i < 2; ++i)
3548 {
3549 init_bit_flag(&bf, unf[i]);
3550 right[bf.offs] = bf.mask;
3551 }
3552
3553 return compare_bitmaps(anchored, a1, a2, left, right);
3554 }
3555
compare_exact_exact(int anchored,Arrow * a1,Arrow * a2)3556 static int compare_exact_exact(int anchored, Arrow *a1, Arrow *a2)
3557 {
3558 char *q1, *q2;
3559
3560 #if !defined(RC_EXACT_ONLY8) && !defined(RC_EXACT_REQ8)
3561 assert(a1->rn->type == EXACT);
3562 assert(a2->rn->type == EXACT);
3563 #endif
3564
3565 q1 = GET_LITERAL(a1);
3566 q2 = GET_LITERAL(a2);
3567
3568 /* fprintf(stderr, "enter compare_exact_exact(%d, '%c', '%c')\n", anchored,
3569 *q1, *q2); */
3570
3571 if (*q1 != *q2)
3572 {
3573 return compare_mismatch(anchored, a1, a2);
3574 }
3575
3576 return compare_tails(anchored, a1, a2);
3577 }
3578
compare_exact_exactf(int anchored,Arrow * a1,Arrow * a2)3579 static int compare_exact_exactf(int anchored, Arrow *a1, Arrow *a2)
3580 {
3581 char *q1, *q2;
3582 char unf[2];
3583
3584 assert(a1->rn->type == EXACT);
3585 assert((a2->rn->type == EXACTF) || (a2->rn->type == EXACTFU));
3586
3587 q1 = GET_LITERAL(a1);
3588 q2 = GET_LITERAL(a2);
3589 init_unfolded(unf, *q2);
3590
3591 if ((*q1 != unf[0]) && (*q1 != unf[1]))
3592 {
3593 return compare_mismatch(anchored, a1, a2);
3594 }
3595
3596 return compare_tails(anchored, a1, a2);
3597 }
3598
compare_exactf_exact(int anchored,Arrow * a1,Arrow * a2)3599 static int compare_exactf_exact(int anchored, Arrow *a1, Arrow *a2)
3600 {
3601 char *q1, *q2;
3602 char unf[2];
3603
3604 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
3605 assert(a2->rn->type == EXACT);
3606
3607 q1 = GET_LITERAL(a1);
3608 init_unfolded(unf, *q1);
3609 q2 = GET_LITERAL(a2);
3610
3611 if ((unf[0] != *q2) || (unf[1] != *q2))
3612 {
3613 return compare_mismatch(anchored, a1, a2);
3614 }
3615
3616 return compare_tails(anchored, a1, a2);
3617 }
3618
compare_exactf_exactf(int anchored,Arrow * a1,Arrow * a2)3619 static int compare_exactf_exactf(int anchored, Arrow *a1, Arrow *a2)
3620 {
3621 char *q1, *q2;
3622 char l1, l2;
3623
3624 assert((a1->rn->type == EXACTF) || (a1->rn->type == EXACTFU));
3625 assert((a2->rn->type == EXACTF) || (a2->rn->type == EXACTFU));
3626
3627 q1 = GET_LITERAL(a1);
3628 q2 = GET_LITERAL(a2);
3629
3630 l1 = TOLOWER(*q1);
3631 l2 = TOLOWER(*q2);
3632
3633 if (l1 != l2)
3634 {
3635 return compare_mismatch(anchored, a1, a2);
3636 }
3637
3638 return compare_tails(anchored, a1, a2);
3639 }
3640
compare_left_branch(int anchored,Arrow * a1,Arrow * a2)3641 static int compare_left_branch(int anchored, Arrow *a1, Arrow *a2)
3642 {
3643 int rv, tsz;
3644 regnode *p1;
3645 Arrow left, right;
3646
3647 /* fprintf(stderr, "enter compare_left_branch\n"); */
3648
3649 assert(a1->rn->type == BRANCH);
3650
3651 /* origins stay the same throughout the cycle */
3652 left.origin = a1->origin;
3653 right.origin = a2->origin;
3654 p1 = a1->rn;
3655 while (p1->type == BRANCH)
3656 {
3657 if (p1->next_off == 0)
3658 {
3659 rc_error = "Branch with zero offset";
3660 return -1;
3661 }
3662
3663 left.rn = p1 + 1;
3664 left.spent = 0;
3665
3666 right.rn = a2->rn;
3667 right.spent = a2->spent;
3668
3669 rv = compare(anchored, &left, &right);
3670 /* fprintf(stderr, "rv = %d\n", rv); */
3671
3672 if (rv < 0)
3673 {
3674 return rv;
3675 }
3676
3677 if (!rv)
3678 {
3679 /* fprintf(stderr, "compare_left_branch doesn't match\n"); */
3680 return compare_mismatch(anchored, a1, a2);
3681 }
3682
3683 p1 += p1->next_off;
3684 }
3685
3686 a1->rn = p1;
3687 a1->spent = 0;
3688
3689 tsz = get_size(a2->rn);
3690 if (tsz <= 0)
3691 {
3692 return -1;
3693 }
3694
3695 a2->rn += tsz - 1;
3696 a2->spent = 0;
3697
3698 return 1;
3699 }
3700
compare_set(int anchored,Arrow * a1,Arrow * a2,unsigned char * b1)3701 static int compare_set(int anchored, Arrow *a1, Arrow *a2, unsigned char *b1)
3702 {
3703 regnode *alt, *t1;
3704 Arrow left, right;
3705 int i, j, power, rv, sz, offs;
3706 unsigned char loc;
3707
3708 offs = GET_OFFSET(a1->rn);
3709 if (offs <= 0)
3710 {
3711 return -1;
3712 }
3713
3714 t1 = a1->rn + offs;
3715 sz = get_size(t1);
3716 if (sz < 0)
3717 {
3718 return sz;
3719 }
3720
3721 alt = (regnode *)malloc(sizeof(regnode) * (2 + sz));
3722 if (!alt)
3723 {
3724 rc_error = "Couldn't allocate memory for alternative copy";
3725 return -1;
3726 }
3727
3728 alt[0].flags = 1;
3729 alt[0].type = EXACT;
3730 alt[0].next_off = 2;
3731 memcpy(alt + 2, t1, sizeof(regnode) * sz);
3732
3733 left.origin = a1->origin;
3734 right.origin = a2->origin;
3735 right.rn = 0;
3736
3737 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
3738 {
3739 loc = b1 ? b1[i] : get_bitmap_byte(a1->rn, i);
3740 if ((i >= 16) && loc)
3741 {
3742 free(alt);
3743 return compare_mismatch(anchored, a1, a2);
3744 }
3745
3746 power = 1;
3747 for (j = 0; j < 8; ++j)
3748 {
3749 if (loc & power)
3750 {
3751 alt[1].flags = 8 * i + j;
3752 left.rn = alt;
3753 left.spent = 0;
3754
3755 right.rn = a2->rn;
3756 right.spent = a2->spent;
3757
3758 rv = compare_right_branch(anchored, &left, &right);
3759 if (rv < 0)
3760 {
3761 free(alt);
3762 return rv;
3763 }
3764
3765 if (!rv)
3766 {
3767 free(alt);
3768 return compare_mismatch(anchored, a1, a2);
3769 }
3770 }
3771
3772 power *= 2;
3773 }
3774 }
3775
3776 free(alt);
3777
3778 if (!right.rn)
3779 {
3780 rc_error = "Empty mask not supported";
3781 return -1;
3782 }
3783
3784 a1->rn = t1 + sz - 1;
3785 assert(a1->rn->type == END);
3786 a1->spent = 0;
3787
3788 a2->rn = right.rn;
3789 a2->spent = right.spent;
3790
3791 return 1;
3792 }
3793
compare_anyof_branch(int anchored,Arrow * a1,Arrow * a2)3794 static int compare_anyof_branch(int anchored, Arrow *a1, Arrow *a2)
3795 {
3796 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
3797 assert(a2->rn->type == BRANCH);
3798
3799 return compare_set(anchored, a1, a2, 0);
3800 }
3801
3802 #ifdef RC_ANYOFR
compare_anyofr_branch(int anchored,Arrow * a1,Arrow * a2)3803 static int compare_anyofr_branch(int anchored, Arrow *a1, Arrow *a2)
3804 {
3805 unsigned char left[ANYOF_BITMAP_SIZE];
3806
3807 assert(a1->rn->type == ANYOFR);
3808 assert(a2->rn->type == BRANCH);
3809
3810 if (!convert_anyofr_to_bitmap(a1, left))
3811 {
3812 return compare_mismatch(anchored, a1, a2);
3813 }
3814
3815 return compare_set(anchored, a1, a2, left);
3816 }
3817 #endif
3818
compare_anyofm_branch(int anchored,Arrow * a1,Arrow * a2)3819 static int compare_anyofm_branch(int anchored, Arrow *a1, Arrow *a2)
3820 {
3821 unsigned char left[ANYOF_BITMAP_SIZE];
3822
3823 assert(a1->rn->type == ANYOFM);
3824 assert(a2->rn->type == BRANCH);
3825
3826 if (!convert_anyofm_to_bitmap(a1, left))
3827 {
3828 return compare_mismatch(anchored, a1, a2);
3829 }
3830
3831 return compare_set(anchored, a1, a2, left);
3832 }
3833
compare_right_branch(int anchored,Arrow * a1,Arrow * a2)3834 static int compare_right_branch(int anchored, Arrow *a1, Arrow *a2)
3835 {
3836 int rv;
3837 regnode *p2;
3838 Arrow left, right;
3839
3840 /* fprintf(stderr, "enter compare_right_branch\n"); */
3841
3842 assert(a2->rn->type == BRANCH);
3843
3844 /* origins stay the same throughout the cycle */
3845 left.origin = a1->origin;
3846 right.origin = a2->origin;
3847 p2 = a2->rn;
3848 rv = 0;
3849 while ((p2->type == BRANCH) && !rv)
3850 {
3851 /* fprintf(stderr, "p2->type = %d\n", p2->type); */
3852
3853 left.rn = a1->rn;
3854 left.spent = a1->spent;
3855
3856 if (p2->next_off == 0)
3857 {
3858 rc_error = "Branch with offset zero";
3859 return -1;
3860 }
3861
3862 right.rn = p2 + 1;
3863 right.spent = 0;
3864
3865 rv = compare(anchored, &left, &right);
3866 /* fprintf(stderr, "got %d\n", rv); */
3867
3868 p2 += p2->next_off;
3869 }
3870
3871 if (rv < 0)
3872 {
3873 return rv;
3874 }
3875
3876 if (!rv)
3877 {
3878 return compare_mismatch(anchored, a1, a2);
3879 }
3880
3881 a1->rn = left.rn;
3882 a1->spent = left.spent;
3883
3884 a2->rn = right.rn;
3885 a2->spent = right.spent;
3886
3887 return 1;
3888 }
3889
compare_right_star(int anchored,Arrow * a1,Arrow * a2)3890 static int compare_right_star(int anchored, Arrow *a1, Arrow *a2)
3891 {
3892 regnode *p2;
3893 Arrow left, right;
3894 int sz, rv, offs;
3895
3896 /* fprintf(stderr, "enter compare_right_star\n"); */
3897
3898 p2 = a2->rn;
3899 assert(p2->type == STAR);
3900
3901 sz = get_size(p2);
3902 if (sz < 0)
3903 {
3904 return sz;
3905 }
3906
3907 left.origin = a1->origin;
3908 left.rn = a1->rn;
3909 left.spent = a1->spent;
3910
3911 offs = GET_OFFSET(p2);
3912 if (offs <= 0)
3913 {
3914 return -1;
3915 }
3916
3917 right.origin = a2->origin;
3918 right.rn = p2 + offs;
3919 right.spent = 0;
3920
3921 rv = compare(anchored, &left, &right);
3922 if (rv < 0)
3923 {
3924 return rv;
3925 }
3926
3927 if (rv == 0)
3928 {
3929 right.rn = p2 + 1;
3930 right.spent = 0;
3931
3932 rv = compare(anchored, a1, &right);
3933 if (rv < 0)
3934 {
3935 return rv;
3936 }
3937
3938 if (!rv)
3939 {
3940 return compare_mismatch(anchored, a1, a2);
3941 }
3942
3943 right.rn = p2;
3944 right.spent = 0;
3945
3946 if (!anchored)
3947 {
3948 rv = compare_right_star(1, a1, &right);
3949 }
3950 }
3951
3952 if (rv <= 0)
3953 {
3954 return rv;
3955 }
3956
3957 a2->rn += sz - 1;
3958 assert(a2->rn->type == END);
3959 a2->spent = 0;
3960
3961 return rv;
3962 }
3963
compare_plus_plus(int anchored,Arrow * a1,Arrow * a2)3964 static int compare_plus_plus(int anchored, Arrow *a1, Arrow *a2)
3965 {
3966 regnode *p1, *p2;
3967 Arrow left, right;
3968 int rv, offs;
3969
3970 p1 = a1->rn;
3971 assert(p1->type == PLUS);
3972 p2 = a2->rn;
3973 assert(p2->type == PLUS);
3974
3975 left.origin = a1->origin;
3976 left.rn = p1 + 1;
3977 left.spent = 0;
3978
3979 right.origin = a2->origin;
3980 right.rn = p2 + 1;
3981 right.spent = 0;
3982
3983 rv = compare(1, &left, &right);
3984 if (rv)
3985 {
3986 return rv;
3987 }
3988
3989 offs = GET_OFFSET(p1);
3990 /* fprintf(stderr, "offs = %d\n", offs); */
3991 if (offs <= 0)
3992 {
3993 return -1;
3994 }
3995
3996 left.origin = a1->origin;
3997 left.rn = p1 + offs;
3998 left.spent = 0;
3999 return compare(1, &left, a2);
4000 }
4001
compare_repeat_star(int anchored,Arrow * a1,Arrow * a2)4002 static int compare_repeat_star(int anchored, Arrow *a1, Arrow *a2)
4003 {
4004 regnode *p1, *p2;
4005 Arrow left, right;
4006 int rv, offs;
4007
4008 p1 = a1->rn;
4009 assert((p1->type == PLUS) || (p1->type == STAR));
4010 p2 = a2->rn;
4011 assert(p2->type == STAR);
4012 /* fprintf(stderr, "enter compare_repeat_star(%d, %d, %d)\n",
4013 anchored, p1->type, p2->type); */
4014
4015 left.origin = a1->origin;
4016 left.rn = p1 + 1;
4017 left.spent = 0;
4018
4019 right.origin = a2->origin;
4020 right.rn = p2 + 1;
4021 right.spent = 0;
4022
4023 rv = compare(1, &left, &right);
4024 /* fprintf(stderr, "inclusive compare returned %d\n", rv); */
4025 if (rv)
4026 {
4027 return rv;
4028 }
4029
4030 offs = GET_OFFSET(p2);
4031 /* fprintf(stderr, "offs = %d\n", offs); */
4032 if (offs <= 0)
4033 {
4034 return -1;
4035 }
4036
4037 right.origin = a2->origin;
4038 right.rn = p2 + offs;
4039 right.spent = 0;
4040 return compare(1, &left, &right);
4041 }
4042
compare_right_curly_from_zero(int anchored,Arrow * a1,Arrow * a2)4043 static int compare_right_curly_from_zero(int anchored, Arrow *a1, Arrow *a2)
4044 {
4045 regnode *p2, *alt;
4046 CurlyCount *cnt;
4047 Arrow left, right;
4048 int sz, rv, offs;
4049
4050 p2 = a2->rn;
4051
4052 sz = get_size(p2);
4053 if (sz < 0)
4054 {
4055 return sz;
4056 }
4057
4058 left.origin = a1->origin;
4059 left.rn = a1->rn;
4060 left.spent = a1->spent;
4061
4062 offs = GET_OFFSET(p2);
4063 if (offs <= 0)
4064 {
4065 return -1;
4066 }
4067
4068 right.origin = a2->origin;
4069 right.rn = p2 + offs;
4070 right.spent = 0;
4071
4072 rv = compare(anchored, &left, &right);
4073 if (rv < 0)
4074 {
4075 return rv;
4076 }
4077
4078 if (rv == 0)
4079 {
4080 alt = alloc_alt(p2, sz);
4081 if (!alt)
4082 {
4083 return -1;
4084 }
4085
4086 right.rn = alt + 2;
4087 right.spent = 0;
4088
4089 rv = compare(anchored, a1, &right);
4090 if (rv < 0)
4091 {
4092 free(alt);
4093 return rv;
4094 }
4095
4096 if (!rv)
4097 {
4098 free(alt);
4099 return compare_mismatch(anchored, a1, a2);
4100 }
4101
4102 cnt = (CurlyCount *)(alt + 1);
4103 if (cnt[1] < INFINITE_COUNT)
4104 {
4105 --cnt[1];
4106 }
4107
4108 if ((cnt[1] > 0) && !anchored)
4109 {
4110 right.rn = alt;
4111 right.spent = 0;
4112
4113 rv = compare_right_curly_from_zero(1, a1, &right);
4114 }
4115 else
4116 {
4117 rv = 1;
4118 }
4119
4120 free(alt);
4121 }
4122
4123 if (rv <= 0)
4124 {
4125 return rv;
4126 }
4127
4128 a2->rn += sz - 1;
4129 assert(a2->rn->type == END);
4130 a2->spent = 0;
4131
4132 return rv;
4133 }
4134
compare_left_plus(int anchored,Arrow * a1,Arrow * a2)4135 static int compare_left_plus(int anchored, Arrow *a1, Arrow *a2)
4136 {
4137 regnode *p1, *alt, *q;
4138 Arrow left, right;
4139 int sz, rv, offs, end_offs;
4140 unsigned char orig_type;
4141
4142 p1 = a1->rn;
4143 assert(p1->type == PLUS);
4144
4145 sz = get_size(p1);
4146 if (sz < 0)
4147 {
4148 return -1;
4149 }
4150
4151 if (sz < 2)
4152 {
4153 rc_error = "Left plus offset too small";
4154 return -1;
4155 }
4156
4157 alt = alloc_alt(p1 + 1, sz - 1);
4158 if (!alt)
4159 {
4160 return -1;
4161 }
4162
4163 if (anchored)
4164 {
4165 offs = get_jump_offset(p1);
4166 if (offs <= 0)
4167 {
4168 return -1;
4169 }
4170
4171 q = p1 + offs;
4172 if (q->type != END)
4173 {
4174 /* repeat with a tail after it can be more strict than a
4175 fixed-length match only if the tail is at least as
4176 strict as the repeated regexp */
4177 left.origin = a1->origin;
4178 left.rn = q;
4179 left.spent = 0;
4180
4181 end_offs = offs - 1;
4182 orig_type = alt[end_offs].type;
4183 alt[end_offs].type = END;
4184
4185 right.origin = a2->origin;
4186 right.rn = alt;
4187 right.spent = 0;
4188
4189 /* fprintf(stderr, "comparing %d to %d\n", left.rn->type,
4190 right.rn->type); */
4191 rv = compare(1, &left, &right);
4192 /* fprintf(stderr, "compare returned %d\n", rv); */
4193 if (rv <= 0)
4194 {
4195 free(alt);
4196 return rv;
4197 }
4198
4199 alt[end_offs].type = orig_type;
4200 }
4201 }
4202
4203 left.origin = a1->origin;
4204 left.rn = alt;
4205 left.spent = 0;
4206 rv = compare(anchored, &left, a2);
4207 free(alt);
4208 return rv;
4209 }
4210
compare_right_plus(int anchored,Arrow * a1,Arrow * a2)4211 static int compare_right_plus(int anchored, Arrow *a1, Arrow *a2)
4212 {
4213 regnode *p2;
4214 Arrow right;
4215 int sz, rv;
4216
4217 p2 = a2->rn;
4218 assert(p2->type == PLUS);
4219
4220 /* fprintf(stderr, "enter compare_right_plus\n"); */
4221
4222 sz = get_size(p2);
4223 if (sz < 0)
4224 {
4225 return -1;
4226 }
4227
4228 if (sz < 2)
4229 {
4230 rc_error = "Plus offset too small";
4231 return -1;
4232 }
4233
4234 /* fprintf(stderr, "sz = %d\n", sz); */
4235
4236 right.origin = a2->origin;
4237 right.rn = p2 + 1;
4238 right.spent = 0;
4239
4240 rv = compare(anchored, a1, &right);
4241
4242 if (rv < 0)
4243 {
4244 return rv;
4245 }
4246
4247 if (!rv)
4248 {
4249 return compare_mismatch(anchored, a1, a2);
4250 }
4251
4252 a2->rn += sz - 1;
4253 assert(a2->rn->type == END);
4254 a2->spent = 0;
4255
4256 return rv;
4257 }
4258
compare_next(int anchored,Arrow * a1,Arrow * a2)4259 static int compare_next(int anchored, Arrow *a1, Arrow *a2)
4260 {
4261 if (bump_regular(a2) <= 0)
4262 {
4263 return -1;
4264 }
4265
4266 return compare(anchored, a1, a2);
4267 }
4268
compare_curly_plus(int anchored,Arrow * a1,Arrow * a2)4269 static int compare_curly_plus(int anchored, Arrow *a1, Arrow *a2)
4270 {
4271 regnode *p1, *p2;
4272 Arrow left, right;
4273 CurlyCount *cnt;
4274
4275 p1 = a1->rn;
4276 assert((p1->type == CURLY) || (p1->type == CURLYM) ||
4277 (p1->type == CURLYX));
4278 p2 = a2->rn;
4279 assert(p2->type == PLUS);
4280
4281 cnt = (CurlyCount *)(p1 + 1);
4282
4283 if (!cnt[0])
4284 {
4285 return compare_mismatch(anchored, a1, a2);
4286 }
4287
4288 left.origin = a1->origin;
4289 left.rn = p1 + 2;
4290 left.spent = 0;
4291
4292 right.origin = a2->origin;
4293 right.rn = p2 + 1;
4294 right.spent = 0;
4295
4296 if (cnt[0] > 1)
4297 {
4298 anchored = 1;
4299 }
4300
4301 return compare(anchored, &left, &right);
4302 }
4303
compare_curly_star(int anchored,Arrow * a1,Arrow * a2)4304 static int compare_curly_star(int anchored, Arrow *a1, Arrow *a2)
4305 {
4306 regnode *p1, *p2;
4307 Arrow left, right;
4308 int rv;
4309
4310 p1 = a1->rn;
4311 assert((p1->type == CURLY) || (p1->type == CURLYM) ||
4312 (p1->type == CURLYX));
4313 p2 = a2->rn;
4314 assert(p2->type == STAR);
4315
4316 left.origin = a1->origin;
4317 left.rn = p1 + 2;
4318 left.spent = 0;
4319
4320 right.origin = a2->origin;
4321 right.rn = p2 + 1;
4322 right.spent = 0;
4323
4324 rv = compare(1, &left, &right);
4325 if (!rv)
4326 {
4327 rv = compare_next(anchored, a1, a2);
4328 }
4329
4330 return rv;
4331 }
4332
compare_plus_curly(int anchored,Arrow * a1,Arrow * a2)4333 static int compare_plus_curly(int anchored, Arrow *a1, Arrow *a2)
4334 {
4335 regnode *p1, *p2, *e2;
4336 Arrow left, right;
4337 CurlyCount *cnt;
4338 int rv, offs;
4339
4340 p1 = a1->rn;
4341 assert(p1->type == PLUS);
4342 p2 = a2->rn;
4343 assert((p2->type == CURLY) || (p2->type == CURLYM) ||
4344 (p2->type == CURLYX));
4345
4346 cnt = (CurlyCount *)(p2 + 1);
4347
4348 if (cnt[0] > 1) /* FIXME: fails '(?:aa)+' => 'a{2,}' */
4349 {
4350 return compare_mismatch(anchored, a1, a2);
4351 }
4352
4353 left.origin = a1->origin;
4354 left.rn = p1 + 1;
4355 left.spent = 0;
4356
4357 if (cnt[1] != INFINITE_COUNT)
4358 {
4359 offs = get_jump_offset(p2);
4360 if (offs <= 0)
4361 {
4362 return -1;
4363 }
4364
4365 e2 = p2 + offs;
4366 if (e2->type != END)
4367 {
4368 return compare_mismatch(anchored, a1, a2);
4369 }
4370 }
4371
4372 right.origin = a2->origin;
4373 right.rn = p2 + 2;
4374 right.spent = 0;
4375
4376 rv = compare(anchored, &left, &right);
4377 return (!rv && !cnt[0]) ? compare_next(anchored, a1, a2) : rv;
4378 }
4379
compare_suspend_curly(int anchored,Arrow * a1,Arrow * a2)4380 static int compare_suspend_curly(int anchored, Arrow *a1, Arrow *a2)
4381 {
4382 assert(a1->rn->type == SUSPEND);
4383 assert(!a1->spent);
4384
4385 a1->rn += 2;
4386
4387 return compare(1, a1, a2);
4388 }
4389
dec_curly_counts(CurlyCount * altcnt)4390 static void dec_curly_counts(CurlyCount *altcnt)
4391 {
4392 --altcnt[0];
4393 if (altcnt[1] < INFINITE_COUNT)
4394 {
4395 --altcnt[1];
4396 }
4397 }
4398
compare_left_curly(int anchored,Arrow * a1,Arrow * a2)4399 static int compare_left_curly(int anchored, Arrow *a1, Arrow *a2)
4400 {
4401 regnode *p1, *alt, *q;
4402 Arrow left, right;
4403 int sz, rv, offs, end_offs;
4404 CurlyCount *cnt;
4405
4406 /* fprintf(stderr, "enter compare_left_curly(%d, %d, %d)\n", anchored,
4407 a1->rn->type, a2->rn->type); */
4408
4409 p1 = a1->rn;
4410 assert((p1->type == CURLY) || (p1->type == CURLYM) ||
4411 (p1->type == CURLYX));
4412
4413 cnt = (CurlyCount *)(p1 + 1);
4414 if (!cnt[0])
4415 {
4416 /* fprintf(stderr, "curly from 0\n"); */
4417 return compare_mismatch(anchored, a1, a2);
4418 }
4419
4420 sz = get_size(p1);
4421 if (sz < 0)
4422 {
4423 return -1;
4424 }
4425
4426 if (sz < 3)
4427 {
4428 rc_error = "Left curly offset too small";
4429 return -1;
4430 }
4431
4432 if (cnt[0] > 1)
4433 {
4434 /* fprintf(stderr, "curly with non-trivial repeat count\n"); */
4435
4436 offs = GET_OFFSET(p1);
4437 if (offs < 0)
4438 {
4439 return -1;
4440 }
4441
4442 if (offs < 3)
4443 {
4444 rc_error = "Left curly offset is too small";
4445 return -1;
4446 }
4447
4448 alt = (regnode *)malloc(sizeof(regnode) * (offs - 2 + sz));
4449 if (!alt)
4450 {
4451 rc_error = "Could not allocate memory for unrolled curly";
4452 return -1;
4453 }
4454
4455 memcpy(alt, p1 + 2, (offs - 2) * sizeof(regnode));
4456 memcpy(alt + offs - 2, p1, sz * sizeof(regnode));
4457
4458 dec_curly_counts((CurlyCount *)(alt + offs - 1));
4459
4460 left.origin = a1->origin;
4461 left.rn = alt;
4462 left.spent = 0;
4463 rv = compare(1, &left, a2);
4464 free(alt);
4465 return rv;
4466 }
4467
4468 if (anchored && !((cnt[0] == 1) && (cnt[1] == 1)))
4469 {
4470 /* fprintf(stderr, "anchored curly with variable length\n"); */
4471
4472 alt = alloc_alt(p1 + 2, sz - 2);
4473 if (!alt)
4474 {
4475 return -1;
4476 }
4477
4478 offs = get_jump_offset(p1);
4479 if (offs <= 0)
4480 {
4481 return -1;
4482 }
4483
4484 q = p1 + offs;
4485 if (q->type != END)
4486 {
4487 /* repeat with a tail after it can be more strict than a
4488 fixed-length match only if the tail is at least as
4489 strict as the repeated regexp */
4490 left.origin = a1->origin;
4491 left.rn = q;
4492 left.spent = 0;
4493
4494 end_offs = offs - 1;
4495 alt[end_offs].type = END;
4496
4497 right.origin = a2->origin;
4498 right.rn = alt;
4499 right.spent = 0;
4500
4501 /* fprintf(stderr, "comparing %d to %d\n", left.rn->type,
4502 right.rn->type); */
4503 rv = compare(1, &left, &right);
4504 free(alt);
4505 /* fprintf(stderr, "compare returned %d\n", rv); */
4506 if (rv <= 0)
4507 {
4508 return rv;
4509 }
4510 }
4511 }
4512
4513 left.origin = a1->origin;
4514 left.rn = p1 + 2;
4515 left.spent = 0;
4516 return compare(anchored, &left, a2);
4517 }
4518
compare_right_curly(int anchored,Arrow * a1,Arrow * a2)4519 static int compare_right_curly(int anchored, Arrow *a1, Arrow *a2)
4520 {
4521 regnode *p2, *alt;
4522 Arrow right;
4523 CurlyCount *cnt, *altcnt;
4524 int sz, rv, offs, nanch;
4525
4526 /* fprintf(stderr, "enter compare_right_curly(%d...: a1->spent = %d, a2->spent = %d\n", anchored, a1->spent, a2->spent); */
4527
4528 p2 = a2->rn;
4529
4530 cnt = (CurlyCount *)(p2 + 1);
4531
4532 /* fprintf(stderr, "compare_right_curly: minimal repeat count = %d\n", cnt[0]); */
4533
4534 nanch = anchored;
4535
4536 if (cnt[0] > 0)
4537 {
4538 /* the repeated expression is mandatory: */
4539 sz = get_size(p2);
4540 if (sz < 0)
4541 {
4542 return sz;
4543 }
4544
4545 if (sz < 3)
4546 {
4547 rc_error = "Right curly offset too small";
4548 return -1;
4549 }
4550
4551 right.origin = a2->origin;
4552 right.rn = p2 + 2;
4553 right.spent = 0;
4554
4555 rv = compare(anchored, a1, &right);
4556 /* fprintf(stderr, "compare_right_curly: compare returned %d\n", rv); */
4557 if (rv < 0)
4558 {
4559 return rv;
4560 }
4561
4562 if (!rv)
4563 {
4564 /* ...or (if we aren't anchored yet) just do the left tail... */
4565 rv = compare_mismatch(anchored, a1, a2);
4566 if (rv)
4567 {
4568 return rv;
4569 }
4570
4571 /* ...or (last try) unroll the repeat (works for e.g.
4572 'abbc' vs. 'ab{2}c' */
4573 if (cnt[0] > 1)
4574 {
4575 offs = GET_OFFSET(p2);
4576 if (offs < 0)
4577 {
4578 return -1;
4579 }
4580
4581 if (offs < 3)
4582 {
4583 rc_error = "Left curly offset is too small";
4584 return -1;
4585 }
4586
4587 alt = (regnode *)malloc(sizeof(regnode) * (offs - 2 + sz));
4588 if (!alt)
4589 {
4590 rc_error = "Couldn't allocate memory for unrolled curly";
4591 return -1;
4592 }
4593
4594 memcpy(alt, p2 + 2, (offs - 2) * sizeof(regnode));
4595 memcpy(alt + offs - 2, p2, sz * sizeof(regnode));
4596
4597 dec_curly_counts((CurlyCount *)(alt + offs - 1));
4598
4599 right.origin = a2->origin;
4600 right.rn = alt;
4601 right.spent = 0;
4602
4603 rv = compare(anchored, a1, &right);
4604 free(alt);
4605 return rv;
4606 }
4607
4608 return 0;
4609 }
4610
4611 if (cnt[0] == 1)
4612 {
4613 return 1;
4614 }
4615
4616 if (a1->rn->type == END)
4617 {
4618 /* we presume the repeated argument matches something, which
4619 isn't guaranteed, but it is conservative */
4620 return 0;
4621 }
4622
4623 /* strictly speaking, matching one repeat didn't *necessarily*
4624 anchor the match, but we'll ignore such cases as
4625 pathological */
4626 nanch = 1;
4627
4628 alt = alloc_alt(p2, sz);
4629 if (!alt)
4630 {
4631 return -1;
4632 }
4633
4634 altcnt = (CurlyCount *)(alt + 1);
4635 dec_curly_counts(altcnt);
4636 if (altcnt[1] > 0)
4637 {
4638 right.origin = a2->origin;
4639 right.rn = alt;
4640 right.spent = 0;
4641
4642 rv = compare_right_curly(nanch, a1, &right);
4643 }
4644 else
4645 {
4646 rv = 1;
4647 }
4648
4649 free(alt);
4650
4651 if (rv <= 0)
4652 {
4653 return rv;
4654 }
4655
4656 a2->rn += sz - 1;
4657 assert(a2->rn->type == END);
4658 a2->spent = 0;
4659 return rv;
4660 }
4661
4662 return compare_right_curly_from_zero(nanch, a1, a2);
4663 }
4664
compare_curly_curly(int anchored,Arrow * a1,Arrow * a2)4665 static int compare_curly_curly(int anchored, Arrow *a1, Arrow *a2)
4666 {
4667 regnode *p1, *p2, *e2;
4668 Arrow left, right;
4669 CurlyCount *cnt1, *cnt2;
4670 int rv, offs;
4671
4672 /* fprintf(stderr, "enter compare_curly_curly(%d...)\n", anchored); */
4673
4674 p1 = a1->rn;
4675 assert((p1->type == CURLY) || (p1->type == CURLYM) ||
4676 (p1->type == CURLYX));
4677 p2 = a2->rn;
4678 assert((p2->type == CURLY) || (p2->type == CURLYM) ||
4679 (p2->type == CURLYX));
4680
4681 cnt1 = (CurlyCount *)(p1 + 1);
4682
4683 cnt2 = (CurlyCount *)(p2 + 1);
4684
4685 if (cnt2[0] > cnt1[0]) /* FIXME: fails '(?:aa){1,}' => 'a{2,}' */
4686 {
4687 /* fprintf(stderr, "curly mismatch\n"); */
4688 return compare_mismatch(anchored, a1, a2);
4689 }
4690
4691 left.origin = a1->origin;
4692 left.rn = p1 + 2;
4693 left.spent = 0;
4694
4695 if (cnt1[1] > cnt2[1])
4696 {
4697 offs = get_jump_offset(p2);
4698 /* fprintf(stderr, "offs = %d\n", offs); */
4699 if (offs <= 0)
4700 {
4701 return -1;
4702 }
4703
4704 e2 = p2 + offs;
4705 /* fprintf(stderr, "e2->type = %d\n", e2->type); */
4706 if (e2->type != END)
4707 {
4708 return compare_mismatch(anchored, a1, a2);
4709 }
4710 }
4711
4712 right.origin = a2->origin;
4713 right.rn = p2 + 2;
4714 right.spent = 0;
4715
4716 /* fprintf(stderr, "comparing tails\n"); */
4717
4718 rv = compare(anchored, &left, &right);
4719 /* fprintf(stderr, "tail compare returned %d\n", rv); */
4720 return (!rv && !cnt2[0]) ? compare_next(anchored, a1, a2) : rv;
4721 }
4722
compare_bound(int anchored,Arrow * a1,Arrow * a2,int move_left,unsigned char * bitmap,char * lookup,unsigned char * oktypes,unsigned char * regclasses,U32 regclasses_size)4723 static int compare_bound(int anchored, Arrow *a1, Arrow *a2,
4724 int move_left, unsigned char *bitmap, char *lookup,
4725 unsigned char *oktypes,
4726 unsigned char *regclasses, U32 regclasses_size)
4727 {
4728 Arrow left, right;
4729 unsigned char t;
4730 int i;
4731 char *seq;
4732
4733 assert((a2->rn->type == BOUND) || (a2->rn->type == NBOUND));
4734
4735 left = *a1;
4736
4737 if (bump_with_check(&left) <= 0)
4738 {
4739 return -1;
4740 }
4741
4742 t = left.rn->type;
4743 if (t >= REGNODE_MAX)
4744 {
4745 rc_error = "Invalid node type";
4746 return -1;
4747 }
4748 else if (t == ANYOF)
4749 {
4750 /* fprintf(stderr, "next is bitmap; flags = 0x%x\n", left.rn->flags); */
4751
4752 if (left.rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
4753 {
4754 return compare_mismatch(anchored, a1, a2);
4755 }
4756
4757 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
4758 {
4759 if (get_bitmap_byte(left.rn, i) & ~bitmap[i])
4760 {
4761 return compare_mismatch(anchored, a1, a2);
4762 }
4763 }
4764 }
4765 #ifdef RC_ANYOFR
4766 else if (t == ANYOFR)
4767 {
4768 unsigned char left_bitmap[ANYOF_BITMAP_SIZE];
4769
4770 if (!convert_anyofr_to_bitmap(&left, left_bitmap))
4771 {
4772 return compare_mismatch(anchored, a1, a2);
4773 }
4774
4775 for (i = 0; i < ANYOF_BITMAP_SIZE; ++i)
4776 {
4777 if (left_bitmap[i] & ~bitmap[i])
4778 {
4779 return compare_mismatch(anchored, a1, a2);
4780 }
4781 }
4782 }
4783 #endif
4784 else if ((t == EXACT) || (t == EXACTF) || (t == EXACTFU))
4785 {
4786 seq = GET_LITERAL(&left);
4787 if (!lookup[(unsigned char)(*seq)])
4788 {
4789 return compare_mismatch(anchored, a1, a2);
4790 }
4791 }
4792 else if ((t == POSIXD) || (t == NPOSIXD) || (t == POSIXU) || (t == NPOSIXU))
4793 {
4794 U8 flags = left.rn->flags;
4795 if ((flags >= regclasses_size) || !regclasses[flags])
4796 {
4797 return compare_mismatch(anchored, a1, a2);
4798 }
4799 }
4800 else if (!oktypes[t])
4801 {
4802 return compare_mismatch(anchored, a1, a2);
4803 }
4804
4805 right = *a2;
4806 if (bump_with_check(&right) <= 0)
4807 {
4808 return -1;
4809 }
4810
4811 return move_left ? compare(1, &left, &right) :
4812 compare(anchored, a1, &right);
4813 }
4814
compare_bol_word(int anchored,Arrow * a1,Arrow * a2)4815 static int compare_bol_word(int anchored, Arrow *a1, Arrow *a2)
4816 {
4817 return compare_bound(anchored, a1, a2, 1, word_bc.bitmap,
4818 word_bc.lookup, alphanumeric_classes,
4819 word_posix_regclasses, SIZEOF_ARRAY(word_posix_regclasses));
4820 }
4821
compare_bol_nword(int anchored,Arrow * a1,Arrow * a2)4822 static int compare_bol_nword(int anchored, Arrow *a1, Arrow *a2)
4823 {
4824 return compare_bound(anchored, a1, a2, 1, word_bc.nbitmap,
4825 word_bc.nlookup, non_alphanumeric_classes,
4826 non_word_posix_regclasses, SIZEOF_ARRAY(non_word_posix_regclasses));
4827 }
4828
compare_next_word(int anchored,Arrow * a1,Arrow * a2)4829 static int compare_next_word(int anchored, Arrow *a1, Arrow *a2)
4830 {
4831 return compare_bound(anchored, a1, a2, 0, word_bc.bitmap,
4832 word_bc.lookup, alphanumeric_classes,
4833 word_posix_regclasses, SIZEOF_ARRAY(word_posix_regclasses));
4834 }
4835
compare_next_nword(int anchored,Arrow * a1,Arrow * a2)4836 static int compare_next_nword(int anchored, Arrow *a1, Arrow *a2)
4837 {
4838 return compare_bound(anchored, a1, a2, 0, word_bc.nbitmap,
4839 word_bc.nlookup, non_alphanumeric_classes,
4840 non_word_posix_regclasses, SIZEOF_ARRAY(non_word_posix_regclasses));
4841 }
4842
compare_anyof_bounds(int anchored,Arrow * a1,Arrow * a2,unsigned char * bitmap1,unsigned char * bitmap2)4843 static int compare_anyof_bounds(int anchored, Arrow *a1, Arrow *a2,
4844 unsigned char *bitmap1, unsigned char *bitmap2)
4845 {
4846 unsigned char loc;
4847 FCompare cmp[2];
4848 int i;
4849
4850 cmp[0] = compare_next_word;
4851 cmp[1] = compare_next_nword;
4852 for (i = 0; (i < ANYOF_BITMAP_SIZE) && (cmp[0] || cmp[1]); ++i)
4853 {
4854 loc = bitmap1 ? bitmap1[i] : get_bitmap_byte(a1->rn, i);
4855
4856 if (loc & ~bitmap2[i])
4857 {
4858 cmp[0] = 0;
4859 }
4860
4861 if (loc & bitmap2[i])
4862 {
4863 cmp[1] = 0;
4864 }
4865 }
4866
4867 if (cmp[0] && cmp[1])
4868 {
4869 rc_error = "Zero bitmap";
4870 return -1;
4871 }
4872
4873 for (i = 0; i < SIZEOF_ARRAY(cmp); ++i)
4874 {
4875 if (cmp[i])
4876 {
4877 return (cmp[i])(anchored, a1, a2);
4878 }
4879 }
4880
4881 /* if would be more elegant to use compare_mismatch as a sentinel
4882 in cmp, but VC 2003 then warns that this function might be
4883 missing a return... */
4884 return compare_mismatch(anchored, a1, a2);
4885 }
4886
compare_anyof_bound(int anchored,Arrow * a1,Arrow * a2)4887 static int compare_anyof_bound(int anchored, Arrow *a1, Arrow *a2)
4888 {
4889 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
4890 assert(a2->rn->type == BOUND);
4891
4892 if (a1->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
4893 {
4894 return compare_mismatch(anchored, a1, a2);
4895 }
4896
4897 return compare_anyof_bounds(anchored, a1, a2, 0, word_bc.nbitmap);
4898 }
4899
compare_anyof_nbound(int anchored,Arrow * a1,Arrow * a2)4900 static int compare_anyof_nbound(int anchored, Arrow *a1, Arrow *a2)
4901 {
4902 assert((a1->rn->type == ANYOF) || (a1->rn->type == ANYOFD));
4903 assert(a2->rn->type == NBOUND);
4904
4905 if (a1->rn->flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
4906 {
4907 return compare_mismatch(anchored, a1, a2);
4908 }
4909
4910 return compare_anyof_bounds(anchored, a1, a2, 0, word_bc.bitmap);
4911 }
4912
4913 #ifdef RC_ANYOFR
compare_anyofr_bound(int anchored,Arrow * a1,Arrow * a2)4914 static int compare_anyofr_bound(int anchored, Arrow *a1, Arrow *a2)
4915 {
4916 unsigned char left[ANYOF_BITMAP_SIZE];
4917
4918 assert(a1->rn->type == ANYOFR);
4919 assert(a2->rn->type == BOUND);
4920
4921 if (!convert_anyofr_to_bitmap(a1, left))
4922 {
4923 return compare_mismatch(anchored, a1, a2);
4924 }
4925
4926 return compare_anyof_bounds(anchored, a1, a2, left, word_bc.nbitmap);
4927 }
4928
compare_anyofr_nbound(int anchored,Arrow * a1,Arrow * a2)4929 static int compare_anyofr_nbound(int anchored, Arrow *a1, Arrow *a2)
4930 {
4931 unsigned char left[ANYOF_BITMAP_SIZE];
4932
4933 assert(a1->rn->type == ANYOFR);
4934 assert(a2->rn->type == NBOUND);
4935
4936 if (!convert_anyofr_to_bitmap(a1, left))
4937 {
4938 return compare_mismatch(anchored, a1, a2);
4939 }
4940
4941 return compare_anyof_bounds(anchored, a1, a2, left, word_bc.bitmap);
4942 }
4943 #endif
4944
compare_anyofm_bound(int anchored,Arrow * a1,Arrow * a2)4945 static int compare_anyofm_bound(int anchored, Arrow *a1, Arrow *a2)
4946 {
4947 unsigned char left[ANYOF_BITMAP_SIZE];
4948
4949 assert(a1->rn->type == ANYOFM);
4950 assert(a2->rn->type == BOUND);
4951
4952 if (!convert_anyofm_to_bitmap(a1, left))
4953 {
4954 return compare_mismatch(anchored, a1, a2);
4955 }
4956
4957 return compare_anyof_bounds(anchored, a1, a2, left, word_bc.nbitmap);
4958 }
4959
compare_anyofm_nbound(int anchored,Arrow * a1,Arrow * a2)4960 static int compare_anyofm_nbound(int anchored, Arrow *a1, Arrow *a2)
4961 {
4962 unsigned char left[ANYOF_BITMAP_SIZE];
4963
4964 assert(a1->rn->type == ANYOFM);
4965 assert(a2->rn->type == NBOUND);
4966
4967 if (!convert_anyofm_to_bitmap(a1, left))
4968 {
4969 return compare_mismatch(anchored, a1, a2);
4970 }
4971
4972 return compare_anyof_bounds(anchored, a1, a2, left, word_bc.bitmap);
4973 }
4974
compare_exact_bound(int anchored,Arrow * a1,Arrow * a2)4975 static int compare_exact_bound(int anchored, Arrow *a1, Arrow *a2)
4976 {
4977 char *seq;
4978 FCompare cmp;
4979
4980 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) ||
4981 (a1->rn->type == EXACTFU));
4982 assert(a2->rn->type == BOUND);
4983
4984 seq = GET_LITERAL(a1);
4985
4986 cmp = word_bc.lookup[(unsigned char)(*seq)] ?
4987 compare_next_nword : compare_next_word;
4988 return cmp(anchored, a1, a2);
4989 }
4990
compare_exact_nbound(int anchored,Arrow * a1,Arrow * a2)4991 static int compare_exact_nbound(int anchored, Arrow *a1, Arrow *a2)
4992 {
4993 char *seq;
4994 FCompare cmp;
4995
4996 assert((a1->rn->type == EXACT) || (a1->rn->type == EXACTF) ||
4997 (a1->rn->type == EXACTFU));
4998 assert(a2->rn->type == NBOUND);
4999
5000 seq = GET_LITERAL(a1);
5001
5002 cmp = word_bc.lookup[(unsigned char)(*seq)] ?
5003 compare_next_word : compare_next_nword;
5004 return cmp(anchored, a1, a2);
5005 }
5006
compare_posix_bound(int anchored,Arrow * a1,Arrow * a2)5007 static int compare_posix_bound(int anchored, Arrow *a1, Arrow *a2)
5008 {
5009 assert((a1->rn->type == POSIXD) || (a1->rn->type == POSIXU) ||
5010 (a1->rn->type == POSIXA));
5011 assert(a2->rn->type == BOUND);
5012
5013 U8 flags = a1->rn->flags;
5014 if ((flags >= SIZEOF_ARRAY(word_posix_regclasses)) ||
5015 (flags >= SIZEOF_ARRAY(non_word_posix_regclasses)) ||
5016 (!word_posix_regclasses[flags] && !non_word_posix_regclasses[flags]))
5017 {
5018 return compare_mismatch(anchored, a1, a2);
5019 }
5020
5021 assert(!word_posix_regclasses[flags] || !non_word_posix_regclasses[flags]);
5022
5023 FCompare cmp = word_posix_regclasses[flags] ?
5024 compare_next_nword : compare_next_word;
5025 return cmp(anchored, a1, a2);
5026 }
5027
compare_posix_nbound(int anchored,Arrow * a1,Arrow * a2)5028 static int compare_posix_nbound(int anchored, Arrow *a1, Arrow *a2)
5029 {
5030 assert((a1->rn->type == POSIXD) || (a1->rn->type == POSIXU) ||
5031 (a1->rn->type == POSIXA));
5032 assert(a2->rn->type == NBOUND);
5033
5034 U8 flags = a1->rn->flags;
5035 if ((flags >= SIZEOF_ARRAY(word_posix_regclasses)) ||
5036 (flags >= SIZEOF_ARRAY(non_word_posix_regclasses)) ||
5037 (!word_posix_regclasses[flags] && !non_word_posix_regclasses[flags]))
5038 {
5039 return compare_mismatch(anchored, a1, a2);
5040 }
5041
5042 assert(!word_posix_regclasses[flags] || !non_word_posix_regclasses[flags]);
5043
5044 FCompare cmp = word_posix_regclasses[flags] ?
5045 compare_next_word : compare_next_nword;
5046 return cmp(anchored, a1, a2);
5047 }
5048
compare_negative_posix_word_bound(int anchored,Arrow * a1,Arrow * a2)5049 static int compare_negative_posix_word_bound(int anchored, Arrow *a1, Arrow *a2)
5050 {
5051 assert((a1->rn->type == NPOSIXD) || (a1->rn->type == NPOSIXU) ||
5052 (a1->rn->type == NPOSIXA));
5053 assert(a2->rn->type == BOUND);
5054
5055 /* we could accept _CC_ALPHANUMERIC as well but let's postpone it
5056 until we see the need */
5057 if (a1->rn->flags != _CC_WORDCHAR)
5058 {
5059 return compare_mismatch(anchored, a1, a2);
5060 }
5061
5062 return compare_next_word(anchored, a1, a2);
5063 }
5064
compare_negative_posix_word_nbound(int anchored,Arrow * a1,Arrow * a2)5065 static int compare_negative_posix_word_nbound(int anchored, Arrow *a1, Arrow *a2)
5066 {
5067 assert((a1->rn->type == NPOSIXD) || (a1->rn->type == NPOSIXU) ||
5068 (a1->rn->type == NPOSIXA));
5069 assert(a2->rn->type == NBOUND);
5070
5071 /* we could accept _CC_ALPHANUMERIC as well but let's postpone it
5072 until we see the need */
5073 if (a1->rn->flags != _CC_WORDCHAR)
5074 {
5075 return compare_mismatch(anchored, a1, a2);
5076 }
5077
5078 return compare_next_nword(anchored, a1, a2);
5079 }
5080
compare_open_open(int anchored,Arrow * a1,Arrow * a2)5081 static int compare_open_open(int anchored, Arrow *a1, Arrow *a2)
5082 {
5083 return compare_tails(anchored, a1, a2);
5084 }
5085
compare_left_open(int anchored,Arrow * a1,Arrow * a2)5086 static int compare_left_open(int anchored, Arrow *a1, Arrow *a2)
5087 {
5088 return compare_left_tail(anchored, a1, a2);
5089 }
5090
compare_right_open(int anchored,Arrow * a1,Arrow * a2)5091 static int compare_right_open(int anchored, Arrow *a1, Arrow *a2)
5092 {
5093 return compare_next(anchored, a1, a2);
5094 }
5095
success(int anchored,Arrow * a1,Arrow * a2)5096 static int success(int anchored, Arrow *a1, Arrow *a2)
5097 {
5098 return 1;
5099 }
5100
5101 /* #define DEBUG_dump */
5102
rc_compare(REGEXP * pt1,REGEXP * pt2)5103 int rc_compare(REGEXP *pt1, REGEXP *pt2)
5104 {
5105 Arrow a1, a2;
5106 regnode *p1, *p2;
5107 #ifdef DEBUG_dump
5108 unsigned char *p;
5109 int i;
5110 #endif
5111
5112 a1.origin = SvANY(pt1);
5113 a2.origin = SvANY(pt2);
5114
5115 if ((get_forced_semantics(pt1) | get_forced_semantics(pt2)) == FORCED_MISMATCH)
5116 {
5117 return 0;
5118 }
5119
5120 p1 = find_internal(a1.origin);
5121 if (!p1)
5122 {
5123 return -1;
5124 }
5125
5126 p2 = find_internal(a2.origin);
5127 if (!p2)
5128 {
5129 return -1;
5130 }
5131
5132 #ifdef DEBUG_dump
5133 p = (unsigned char *)p1;
5134 for (i = 1; i <= 64; ++i)
5135 {
5136 fprintf(stderr, " %02x", (int)p[i - 1]);
5137 if (!(i % 4))
5138 {
5139 fprintf(stderr, "\n");
5140 }
5141 }
5142
5143 fprintf(stderr, "\n\n");
5144
5145 p = (unsigned char *)p2;
5146 for (i = 1; i <= 64; ++i)
5147 {
5148 fprintf(stderr, " %02x", (int)p[i - 1]);
5149 if (!(i % 4))
5150 {
5151 fprintf(stderr, "\n");
5152 }
5153 }
5154
5155 fprintf(stderr, "\n\n");
5156 #endif
5157
5158 a1.rn = p1;
5159 a1.spent = 0;
5160 a2.rn = p2;
5161 a2.spent = 0;
5162
5163 return compare(0, &a1, &a2);
5164 }
5165
compare(int anchored,Arrow * a1,Arrow * a2)5166 static int compare(int anchored, Arrow *a1, Arrow *a2)
5167 {
5168 FCompare cmp;
5169
5170 /* fprintf(stderr, "enter compare(%d, %d, %d)\n", anchored,
5171 a1->rn->type, a2->rn->type); */
5172
5173 if ((a1->rn->type >= REGNODE_MAX) || (a2->rn->type >= REGNODE_MAX))
5174 {
5175 rc_error = "Invalid regexp node type";
5176 return -1;
5177 }
5178
5179 cmp = dispatch[a1->rn->type][a2->rn->type];
5180 if (!cmp)
5181 {
5182 /* fprintf(stderr, "no comparator\n"); */
5183 return 0;
5184 }
5185
5186 return cmp(anchored, a1, a2);
5187 }
5188
rc_init()5189 void rc_init()
5190 {
5191 int i, wstart;
5192
5193 /* could have used compile-time assertion, but why bother
5194 making it compatible... */
5195 assert(ANYOF_BITMAP_SIZE == 32);
5196
5197 init_forced_byte();
5198
5199 init_byte_class(&whitespace, whitespace_expl,
5200 SIZEOF_ARRAY(whitespace_expl));
5201 init_byte_class(&horizontal_whitespace, horizontal_whitespace_expl,
5202 SIZEOF_ARRAY(horizontal_whitespace_expl));
5203 init_byte_class(&vertical_whitespace, vertical_whitespace_expl,
5204 SIZEOF_ARRAY(vertical_whitespace_expl));
5205
5206 for (i = 0; i < SIZEOF_ARRAY(digit_expl); ++i)
5207 {
5208 digit_expl[i] = '0' + i;
5209 }
5210
5211 init_byte_class(&digit, digit_expl, SIZEOF_ARRAY(digit_expl));
5212
5213 memcpy(xdigit_expl, digit_expl, 10 * sizeof(char));
5214
5215 wstart = 10;
5216 for (i = 0; i < 6; ++i)
5217 {
5218 xdigit_expl[wstart + i] = 'a' + i;
5219 }
5220
5221 wstart += 6;
5222 for (i = 0; i < 6; ++i)
5223 {
5224 xdigit_expl[wstart + i] = 'A' + i;
5225 }
5226
5227 init_byte_class(&xdigit, xdigit_expl, SIZEOF_ARRAY(xdigit_expl));
5228
5229 init_byte_class(&ndot, ndot_expl, SIZEOF_ARRAY(ndot_expl));
5230
5231 alphanumeric_expl[0] = '_';
5232
5233 wstart = 1;
5234 memcpy(alphanumeric_expl + wstart, digit_expl, 10 * sizeof(char));
5235
5236 wstart += 10;
5237 for (i = 0; i < LETTER_COUNT; ++i)
5238 {
5239 alphanumeric_expl[wstart + i] = 'a' + i;
5240 }
5241
5242 wstart += LETTER_COUNT;
5243 for (i = 0; i < LETTER_COUNT; ++i)
5244 {
5245 alphanumeric_expl[wstart + i] = 'A' + i;
5246 }
5247
5248 init_byte_class(&word_bc, alphanumeric_expl,
5249 SIZEOF_ARRAY(alphanumeric_expl));
5250 init_byte_class(&alnum_bc, alphanumeric_expl + 1,
5251 SIZEOF_ARRAY(alphanumeric_expl) - 1);
5252
5253 for (i = 0; i < LETTER_COUNT; ++i)
5254 {
5255 alpha_expl[i] = lower_expl[i] = 'a' + i;
5256 }
5257
5258 wstart = LETTER_COUNT;
5259 for (i = 0; i < LETTER_COUNT; ++i)
5260 {
5261 alpha_expl[wstart + i] = upper_expl[i] = 'A' + i;
5262 }
5263
5264 init_byte_class(&alpha_bc, alpha_expl,
5265 SIZEOF_ARRAY(alpha_expl));
5266 init_byte_class(&lower_bc, lower_expl,
5267 SIZEOF_ARRAY(lower_expl));
5268 init_byte_class(&upper_bc, upper_expl,
5269 SIZEOF_ARRAY(upper_expl));
5270
5271 memset(alphanumeric_classes, 0, SIZEOF_ARRAY(alphanumeric_classes));
5272
5273 memset(non_alphanumeric_classes, 0,
5274 SIZEOF_ARRAY(non_alphanumeric_classes));
5275 non_alphanumeric_classes[EOS] = non_alphanumeric_classes[EOL] =
5276 non_alphanumeric_classes[SEOL] = 1;
5277
5278 posix_regclass_blocks[_CC_VERTSPACE] = VERTICAL_SPACE_BLOCK;
5279 posix_regclass_bitmaps[_CC_VERTSPACE] = vertical_whitespace.bitmap;
5280 posix_regclass_nbitmaps[_CC_VERTSPACE] = vertical_whitespace.nbitmap;
5281
5282 memset(word_posix_regclasses, 0,
5283 SIZEOF_ARRAY(word_posix_regclasses));
5284 word_posix_regclasses[_CC_WORDCHAR] =
5285 word_posix_regclasses[_CC_DIGIT] =
5286 word_posix_regclasses[_CC_ALPHA] =
5287 word_posix_regclasses[_CC_LOWER] =
5288 word_posix_regclasses[_CC_UPPER] =
5289 word_posix_regclasses[_CC_UPPER] =
5290 word_posix_regclasses[_CC_ALPHANUMERIC] =
5291 word_posix_regclasses[_CC_CASED] =
5292 word_posix_regclasses[_CC_XDIGIT] = 1;
5293
5294 memset(non_word_posix_regclasses, 0,
5295 SIZEOF_ARRAY(non_word_posix_regclasses));
5296 non_word_posix_regclasses[_CC_PUNCT] =
5297 non_word_posix_regclasses[_CC_SPACE] =
5298 non_word_posix_regclasses[_CC_BLANK] =
5299 non_word_posix_regclasses[_CC_VERTSPACE] = 1;
5300
5301 memset(newline_posix_regclasses, 0,
5302 SIZEOF_ARRAY(newline_posix_regclasses));
5303 newline_posix_regclasses[_CC_SPACE] =
5304 newline_posix_regclasses[_CC_CNTRL] =
5305 newline_posix_regclasses[_CC_ASCII] =
5306 newline_posix_regclasses[_CC_VERTSPACE] = 1;
5307
5308 memset(trivial_nodes, 0, SIZEOF_ARRAY(trivial_nodes));
5309 trivial_nodes[SUCCEED] = trivial_nodes[NOTHING] =
5310 trivial_nodes[TAIL] = trivial_nodes[WHILEM] = 1;
5311
5312 memset(dispatch, 0, sizeof(FCompare) * REGNODE_MAX * REGNODE_MAX);
5313
5314 for (i = 0; i < REGNODE_MAX; ++i)
5315 {
5316 dispatch[i][END] = success;
5317 }
5318
5319 for (i = 0; i < REGNODE_MAX; ++i)
5320 {
5321 dispatch[i][SUCCEED] = compare_next;
5322 }
5323
5324 dispatch[SUCCEED][SUCCEED] = compare_tails;
5325
5326 dispatch[SUCCEED][MBOL] = compare_left_tail;
5327 dispatch[MBOL][MBOL] = compare_tails;
5328 dispatch[SBOL][MBOL] = compare_tails;
5329 dispatch[REG_ANY][MBOL] = compare_mismatch;
5330 dispatch[SANY][MBOL] = compare_mismatch;
5331 dispatch[ANYOF][MBOL] = compare_anyof_multiline;
5332 dispatch[ANYOFD][MBOL] = compare_anyof_multiline;
5333 #ifdef RC_ANYOFR
5334 dispatch[ANYOFR][MBOL] = compare_anyofr_multiline;
5335 #endif
5336 dispatch[ANYOFM][MBOL] = compare_anyofm_multiline;
5337 dispatch[NANYOFM][MBOL] = compare_nanyofm_multiline;
5338 dispatch[POSIXD][MBOL] = compare_mismatch;
5339 dispatch[POSIXU][MBOL] = compare_mismatch;
5340 dispatch[POSIXA][MBOL] = compare_mismatch;
5341 dispatch[NPOSIXD][MBOL] = compare_mismatch;
5342 dispatch[NPOSIXU][MBOL] = compare_mismatch;
5343 dispatch[NPOSIXA][MBOL] = compare_mismatch;
5344 dispatch[BRANCH][MBOL] = compare_left_branch;
5345 dispatch[EXACT][MBOL] = compare_exact_multiline;
5346 dispatch[EXACTF][MBOL] = compare_exact_multiline;
5347 dispatch[EXACTFU][MBOL] = compare_exact_multiline;
5348 dispatch[NOTHING][MBOL] = compare_left_tail;
5349 dispatch[TAIL][MBOL] = compare_left_tail;
5350 dispatch[STAR][MBOL] = compare_mismatch;
5351 dispatch[PLUS][MBOL] = compare_left_plus;
5352 dispatch[CURLY][MBOL] = compare_left_curly;
5353 dispatch[CURLYM][MBOL] = compare_left_curly;
5354 dispatch[CURLYX][MBOL] = compare_left_curly;
5355 dispatch[WHILEM][MBOL] = compare_left_tail;
5356 dispatch[OPEN][MBOL] = compare_left_open;
5357 dispatch[CLOSE][MBOL] = compare_left_tail;
5358 dispatch[IFMATCH][MBOL] = compare_after_assertion;
5359 dispatch[UNLESSM][MBOL] = compare_after_assertion;
5360 dispatch[MINMOD][MBOL] = compare_left_tail;
5361 dispatch[LNBREAK][MBOL] = compare_tails;
5362 dispatch[OPTIMIZED][MBOL] = compare_left_tail;
5363
5364 dispatch[SUCCEED][SBOL] = compare_left_tail;
5365 dispatch[SBOL][SBOL] = compare_tails;
5366 dispatch[BRANCH][SBOL] = compare_left_branch;
5367 dispatch[NOTHING][SBOL] = compare_left_tail;
5368 dispatch[TAIL][SBOL] = compare_left_tail;
5369 dispatch[STAR][SBOL] = compare_mismatch;
5370 dispatch[PLUS][SBOL] = compare_left_plus;
5371 dispatch[CURLY][SBOL] = compare_left_curly;
5372 dispatch[CURLYM][SBOL] = compare_left_curly;
5373 dispatch[CURLYX][SBOL] = compare_left_curly;
5374 dispatch[WHILEM][SBOL] = compare_left_tail;
5375 dispatch[OPEN][SBOL] = compare_left_open;
5376 dispatch[CLOSE][SBOL] = compare_left_tail;
5377 dispatch[IFMATCH][SBOL] = compare_after_assertion;
5378 dispatch[UNLESSM][SBOL] = compare_after_assertion;
5379 dispatch[MINMOD][SBOL] = compare_left_tail;
5380 dispatch[OPTIMIZED][SBOL] = compare_left_tail;
5381
5382 dispatch[SUCCEED][EOS] = compare_left_tail;
5383 dispatch[EOS][EOS] = compare_tails;
5384 dispatch[EOL][EOS] = compare_mismatch;
5385 dispatch[SEOL][EOS] = compare_mismatch;
5386 dispatch[BRANCH][EOS] = compare_left_branch;
5387 dispatch[NOTHING][EOS] = compare_left_tail;
5388 dispatch[TAIL][EOS] = compare_left_tail;
5389 dispatch[STAR][EOS] = compare_mismatch;
5390 dispatch[PLUS][EOS] = compare_left_plus;
5391 dispatch[CURLY][EOS] = compare_left_curly;
5392 dispatch[CURLYM][EOS] = compare_left_curly;
5393 dispatch[CURLYX][EOS] = compare_left_curly;
5394 dispatch[WHILEM][EOS] = compare_left_tail;
5395 dispatch[OPEN][EOS] = compare_left_open;
5396 dispatch[CLOSE][EOS] = compare_left_tail;
5397 dispatch[IFMATCH][EOS] = compare_after_assertion;
5398 dispatch[UNLESSM][EOS] = compare_after_assertion;
5399 dispatch[MINMOD][EOS] = compare_left_tail;
5400 dispatch[OPTIMIZED][EOS] = compare_left_tail;
5401
5402 dispatch[SUCCEED][EOL] = compare_left_tail;
5403 dispatch[EOS][EOL] = compare_tails;
5404 dispatch[EOL][EOL] = compare_tails;
5405 dispatch[SEOL][EOL] = compare_tails;
5406 dispatch[BRANCH][EOL] = compare_left_branch;
5407 dispatch[NOTHING][EOL] = compare_left_tail;
5408 dispatch[TAIL][EOL] = compare_left_tail;
5409 dispatch[STAR][EOL] = compare_mismatch;
5410 dispatch[PLUS][EOL] = compare_left_plus;
5411 dispatch[CURLY][EOL] = compare_left_curly;
5412 dispatch[CURLYM][EOL] = compare_left_curly;
5413 dispatch[CURLYX][EOL] = compare_left_curly;
5414 dispatch[WHILEM][EOL] = compare_left_tail;
5415 dispatch[OPEN][EOL] = compare_left_open;
5416 dispatch[CLOSE][EOL] = compare_left_tail;
5417 dispatch[IFMATCH][EOL] = compare_after_assertion;
5418 dispatch[UNLESSM][EOL] = compare_after_assertion;
5419 dispatch[MINMOD][EOL] = compare_left_tail;
5420 dispatch[OPTIMIZED][EOL] = compare_left_tail;
5421
5422 dispatch[SUCCEED][MEOL] = compare_left_tail;
5423 dispatch[EOS][MEOL] = compare_tails;
5424 dispatch[EOL][MEOL] = compare_tails;
5425 dispatch[MEOL][MEOL] = compare_tails;
5426 dispatch[SEOL][MEOL] = compare_tails;
5427 dispatch[REG_ANY][MEOL] = compare_mismatch;
5428 dispatch[SANY][MEOL] = compare_mismatch;
5429 dispatch[ANYOF][MEOL] = compare_anyof_multiline; /* not in tests; remove? */
5430 dispatch[POSIXD][MEOL] = compare_mismatch;
5431 dispatch[POSIXU][MEOL] = compare_mismatch;
5432 dispatch[POSIXA][MEOL] = compare_mismatch;
5433 dispatch[NPOSIXD][MEOL] = compare_mismatch;
5434 dispatch[NPOSIXU][MEOL] = compare_mismatch;
5435 dispatch[NPOSIXA][MEOL] = compare_mismatch;
5436 dispatch[BRANCH][MEOL] = compare_left_branch;
5437 dispatch[EXACT][MEOL] = compare_exact_multiline;
5438 dispatch[EXACTF][MEOL] = compare_exact_multiline;
5439 dispatch[EXACTFU][MEOL] = compare_exact_multiline;
5440 dispatch[NOTHING][MEOL] = compare_left_tail;
5441 dispatch[TAIL][MEOL] = compare_left_tail;
5442 dispatch[STAR][MEOL] = compare_mismatch;
5443 dispatch[PLUS][MEOL] = compare_left_plus;
5444 dispatch[CURLY][MEOL] = compare_left_curly;
5445 dispatch[CURLYM][MEOL] = compare_left_curly;
5446 dispatch[CURLYX][MEOL] = compare_left_curly;
5447 dispatch[WHILEM][MEOL] = compare_left_tail;
5448 dispatch[OPEN][MEOL] = compare_left_open;
5449 dispatch[CLOSE][MEOL] = compare_left_tail;
5450 dispatch[IFMATCH][MEOL] = compare_after_assertion;
5451 dispatch[UNLESSM][MEOL] = compare_after_assertion;
5452 dispatch[MINMOD][MEOL] = compare_left_tail;
5453 dispatch[LNBREAK][MEOL] = compare_mismatch;
5454 dispatch[OPTIMIZED][MEOL] = compare_left_tail;
5455
5456 dispatch[SUCCEED][SEOL] = compare_left_tail;
5457 dispatch[EOS][SEOL] = compare_tails;
5458 dispatch[EOL][SEOL] = compare_tails;
5459 dispatch[SEOL][SEOL] = compare_tails;
5460 dispatch[BRANCH][SEOL] = compare_left_branch;
5461 dispatch[NOTHING][SEOL] = compare_left_tail;
5462 dispatch[POSIXD][SEOL] = compare_mismatch;
5463 dispatch[POSIXU][SEOL] = compare_mismatch;
5464 dispatch[POSIXA][SEOL] = compare_mismatch;
5465 dispatch[NPOSIXD][SEOL] = compare_mismatch;
5466 dispatch[NPOSIXU][SEOL] = compare_mismatch;
5467 dispatch[NPOSIXA][SEOL] = compare_mismatch;
5468 dispatch[TAIL][SEOL] = compare_left_tail;
5469 dispatch[STAR][SEOL] = 0;
5470 dispatch[PLUS][SEOL] = compare_left_plus;
5471 dispatch[CURLY][SEOL] = compare_left_curly;
5472 dispatch[CURLYM][SEOL] = compare_left_curly;
5473 dispatch[CURLYX][SEOL] = compare_left_curly;
5474 dispatch[WHILEM][SEOL] = compare_left_tail;
5475 dispatch[OPEN][SEOL] = compare_left_open;
5476 dispatch[CLOSE][SEOL] = compare_left_tail;
5477 dispatch[IFMATCH][SEOL] = compare_after_assertion;
5478 dispatch[UNLESSM][SEOL] = compare_after_assertion;
5479 dispatch[MINMOD][SEOL] = compare_left_tail;
5480 dispatch[LNBREAK][SEOL] = compare_mismatch;
5481 dispatch[OPTIMIZED][SEOL] = compare_left_tail;
5482
5483 dispatch[SUCCEED][BOUND] = compare_left_tail;
5484 dispatch[MBOL][BOUND] = compare_bol_word;
5485 dispatch[SBOL][BOUND] = compare_bol_word;
5486 dispatch[BOUND][BOUND] = compare_tails;
5487 dispatch[NBOUND][BOUND] = compare_mismatch;
5488 dispatch[REG_ANY][BOUND] = compare_mismatch;
5489 dispatch[SANY][BOUND] = compare_mismatch;
5490 dispatch[ANYOF][BOUND] = compare_anyof_bound;
5491 dispatch[ANYOFD][BOUND] = compare_anyof_bound;
5492 #ifdef RC_ANYOFR
5493 dispatch[ANYOFR][BOUND] = compare_anyofr_bound;
5494 #endif
5495 dispatch[ANYOFM][BOUND] = compare_anyofm_bound;
5496 dispatch[NANYOFM][BOUND] = compare_mismatch;
5497 dispatch[POSIXD][BOUND] = compare_posix_bound;
5498 dispatch[POSIXU][BOUND] = compare_posix_bound;
5499 dispatch[POSIXA][BOUND] = compare_posix_bound;
5500 dispatch[NPOSIXD][BOUND] = compare_negative_posix_word_bound;
5501 dispatch[NPOSIXU][BOUND] = compare_mismatch; /* should be replaced, needs extra test */
5502 dispatch[NPOSIXA][BOUND] = compare_negative_posix_word_bound;
5503 dispatch[BRANCH][BOUND] = compare_left_branch;
5504 dispatch[EXACT][BOUND] = compare_exact_bound;
5505 dispatch[EXACTF][BOUND] = compare_exact_bound;
5506 dispatch[EXACTFU][BOUND] = compare_exact_bound;
5507 dispatch[NOTHING][BOUND] = compare_left_tail;
5508 dispatch[TAIL][BOUND] = compare_left_tail;
5509 dispatch[CURLY][BOUND] = compare_left_curly;
5510 dispatch[CURLYM][BOUND] = compare_left_curly;
5511 dispatch[CURLYX][BOUND] = compare_left_curly;
5512 dispatch[WHILEM][BOUND] = compare_left_tail;
5513 dispatch[OPEN][BOUND] = compare_left_open;
5514 dispatch[CLOSE][BOUND] = compare_left_tail;
5515 dispatch[IFMATCH][BOUND] = compare_after_assertion;
5516 dispatch[UNLESSM][BOUND] = compare_after_assertion;
5517 dispatch[MINMOD][BOUND] = compare_left_tail;
5518 dispatch[LNBREAK][BOUND] = compare_mismatch;
5519 dispatch[OPTIMIZED][BOUND] = compare_left_tail;
5520
5521 dispatch[SUCCEED][NBOUND] = compare_left_tail;
5522 dispatch[MBOL][NBOUND] = compare_bol_nword;
5523 dispatch[SBOL][NBOUND] = compare_bol_nword;
5524 dispatch[BOUND][NBOUND] = compare_mismatch;
5525 dispatch[NBOUND][NBOUND] = compare_tails;
5526 dispatch[REG_ANY][NBOUND] = compare_mismatch;
5527 dispatch[SANY][NBOUND] = compare_mismatch;
5528 dispatch[ANYOF][NBOUND] = compare_anyof_nbound;
5529 dispatch[ANYOFD][NBOUND] = compare_anyof_nbound;
5530 #ifdef RC_ANYOFR
5531 dispatch[ANYOFR][NBOUND] = compare_anyofr_nbound;
5532 #endif
5533 dispatch[ANYOFM][NBOUND] = compare_anyofm_nbound;
5534 dispatch[NANYOFM][NBOUND] = compare_mismatch;
5535 dispatch[POSIXD][NBOUND] = compare_posix_nbound;
5536 dispatch[POSIXU][NBOUND] = compare_posix_nbound;
5537 dispatch[POSIXA][NBOUND] = compare_posix_nbound;
5538 dispatch[NPOSIXD][NBOUND] = compare_negative_posix_word_nbound;
5539 dispatch[NPOSIXU][NBOUND] = compare_negative_posix_word_nbound;
5540 dispatch[NPOSIXA][NBOUND] = compare_negative_posix_word_nbound;
5541 dispatch[BRANCH][NBOUND] = compare_left_branch;
5542 dispatch[EXACT][NBOUND] = compare_exact_nbound;
5543 dispatch[EXACTF][NBOUND] = compare_exact_nbound;
5544 dispatch[EXACTFU][NBOUND] = compare_exact_nbound;
5545 dispatch[NOTHING][NBOUND] = compare_left_tail;
5546 dispatch[TAIL][NBOUND] = compare_left_tail;
5547 dispatch[CURLY][NBOUND] = compare_left_curly;
5548 dispatch[CURLYM][NBOUND] = compare_left_curly;
5549 dispatch[CURLYX][NBOUND] = compare_left_curly;
5550 dispatch[WHILEM][NBOUND] = compare_left_tail;
5551 dispatch[OPEN][NBOUND] = compare_left_open;
5552 dispatch[CLOSE][NBOUND] = compare_left_tail;
5553 dispatch[IFMATCH][NBOUND] = compare_after_assertion;
5554 dispatch[UNLESSM][NBOUND] = compare_after_assertion;
5555 dispatch[MINMOD][NBOUND] = compare_left_tail;
5556 dispatch[LNBREAK][NBOUND] = compare_mismatch;
5557 dispatch[OPTIMIZED][NBOUND] = compare_left_tail;
5558
5559 dispatch[SUCCEED][REG_ANY] = compare_left_tail;
5560 dispatch[MBOL][REG_ANY] = compare_bol;
5561 dispatch[SBOL][REG_ANY] = compare_bol;
5562 dispatch[BOUND][REG_ANY] = compare_mismatch;
5563 dispatch[NBOUND][REG_ANY] = compare_mismatch;
5564 dispatch[REG_ANY][REG_ANY] = compare_tails;
5565 dispatch[SANY][REG_ANY] = compare_mismatch;
5566 dispatch[ANYOF][REG_ANY] = compare_anyof_reg_any;
5567 dispatch[ANYOFD][REG_ANY] = compare_anyof_reg_any;
5568 #ifdef RC_ANYOFR
5569 dispatch[ANYOFR][REG_ANY] = compare_anyofr_reg_any;
5570 #endif
5571 dispatch[ANYOFM][REG_ANY] = compare_anyofm_reg_any;
5572 dispatch[NANYOFM][REG_ANY] = compare_nanyofm_reg_any;
5573 dispatch[POSIXD][REG_ANY] = compare_posix_reg_any;
5574 dispatch[POSIXU][REG_ANY] = compare_posix_reg_any;
5575 dispatch[POSIXA][REG_ANY] = compare_posix_reg_any;
5576 dispatch[NPOSIXD][REG_ANY] = compare_negative_posix_reg_any;
5577 dispatch[NPOSIXU][REG_ANY] = compare_negative_posix_reg_any;
5578 dispatch[NPOSIXA][REG_ANY] = compare_negative_posix_reg_any;
5579 dispatch[BRANCH][REG_ANY] = compare_left_branch;
5580 dispatch[EXACT][REG_ANY] = compare_exact_reg_any;
5581 dispatch[EXACTF][REG_ANY] = compare_exact_reg_any;
5582 dispatch[EXACTFU][REG_ANY] = compare_exact_reg_any;
5583 dispatch[NOTHING][REG_ANY] = compare_left_tail;
5584 dispatch[TAIL][REG_ANY] = compare_left_tail;
5585 dispatch[STAR][REG_ANY] = compare_mismatch;
5586 dispatch[PLUS][REG_ANY] = compare_left_plus;
5587 dispatch[CURLY][REG_ANY] = compare_left_curly;
5588 dispatch[CURLYM][REG_ANY] = compare_left_curly;
5589 dispatch[CURLYX][REG_ANY] = compare_left_curly;
5590 dispatch[WHILEM][REG_ANY] = compare_left_tail;
5591 dispatch[OPEN][REG_ANY] = compare_left_open;
5592 dispatch[CLOSE][REG_ANY] = compare_left_tail;
5593 dispatch[IFMATCH][REG_ANY] = compare_after_assertion;
5594 dispatch[UNLESSM][REG_ANY] = compare_after_assertion;
5595 dispatch[MINMOD][REG_ANY] = compare_left_tail;
5596 dispatch[LNBREAK][REG_ANY] = compare_mismatch;
5597 dispatch[OPTIMIZED][REG_ANY] = compare_left_tail;
5598
5599 dispatch[SUCCEED][SANY] = compare_left_tail;
5600 dispatch[MBOL][SANY] = compare_bol;
5601 dispatch[SBOL][SANY] = compare_bol;
5602 dispatch[BOUND][SANY] = compare_mismatch;
5603 dispatch[NBOUND][SANY] = compare_mismatch;
5604 dispatch[REG_ANY][SANY] = compare_tails;
5605 dispatch[SANY][SANY] = compare_tails;
5606 dispatch[ANYOF][SANY] = compare_tails;
5607 dispatch[ANYOFD][SANY] = compare_tails;
5608 #ifdef RC_ANYOFR
5609 dispatch[ANYOFR][SANY] = compare_tails;
5610 #endif
5611 dispatch[ANYOFM][SANY] = compare_tails;
5612 dispatch[NANYOFM][SANY] = compare_tails;
5613 dispatch[POSIXD][SANY] = compare_tails;
5614 dispatch[POSIXU][SANY] = compare_tails;
5615 dispatch[POSIXA][SANY] = compare_tails;
5616 dispatch[NPOSIXD][SANY] = compare_tails;
5617 dispatch[NPOSIXU][SANY] = compare_tails;
5618 dispatch[NPOSIXA][SANY] = compare_tails;
5619 dispatch[BRANCH][SANY] = compare_left_branch;
5620 dispatch[EXACT][SANY] = compare_tails;
5621 dispatch[EXACTF][SANY] = compare_tails;
5622 dispatch[EXACTFU][SANY] = compare_tails;
5623 dispatch[NOTHING][SANY] = compare_left_tail;
5624 dispatch[TAIL][SANY] = compare_left_tail;
5625 dispatch[STAR][SANY] = compare_mismatch;
5626 dispatch[PLUS][SANY] = compare_left_plus;
5627 dispatch[CURLY][SANY] = compare_left_curly;
5628 dispatch[CURLYM][SANY] = compare_left_curly;
5629 dispatch[CURLYX][SANY] = compare_left_curly;
5630 dispatch[WHILEM][SANY] = compare_left_tail;
5631 dispatch[OPEN][SANY] = compare_left_open;
5632 dispatch[CLOSE][SANY] = compare_left_tail;
5633 dispatch[IFMATCH][SANY] = compare_after_assertion;
5634 dispatch[UNLESSM][SANY] = compare_after_assertion;
5635 dispatch[MINMOD][SANY] = compare_left_tail;
5636 dispatch[LNBREAK][SANY] = compare_mismatch;
5637 dispatch[OPTIMIZED][SANY] = compare_left_tail;
5638
5639 dispatch[SUCCEED][ANYOF] = compare_left_tail;
5640 dispatch[MBOL][ANYOF] = compare_bol;
5641 dispatch[SBOL][ANYOF] = compare_bol;
5642 dispatch[BOUND][ANYOF] = compare_mismatch;
5643 dispatch[NBOUND][ANYOF] = compare_mismatch;
5644 dispatch[REG_ANY][ANYOF] = compare_reg_any_anyof;
5645 dispatch[SANY][ANYOF] = compare_sany_anyof;
5646 dispatch[ANYOF][ANYOF] = compare_anyof_anyof;
5647 dispatch[ANYOFD][ANYOF] = compare_anyof_anyof;
5648 #ifdef RC_ANYOFR
5649 dispatch[ANYOFR][ANYOF] = compare_anyofr_anyof;
5650 #endif
5651 dispatch[ANYOFM][ANYOF] = compare_anyofm_anyof;
5652 dispatch[NANYOFM][ANYOF] = compare_mismatch;
5653 dispatch[POSIXD][ANYOF] = compare_posix_anyof;
5654 dispatch[POSIXU][ANYOF] = compare_posix_anyof;
5655 dispatch[POSIXA][ANYOF] = compare_posix_anyof;
5656 dispatch[NPOSIXD][ANYOF] = compare_negative_posix_anyof;
5657 dispatch[NPOSIXU][ANYOF] = compare_negative_posix_anyof;
5658 dispatch[NPOSIXA][ANYOF] = compare_negative_posix_anyof;
5659 dispatch[BRANCH][ANYOF] = compare_left_branch;
5660 dispatch[EXACT][ANYOF] = compare_exact_anyof;
5661 dispatch[EXACTF][ANYOF] = compare_exactf_anyof;
5662 dispatch[EXACTFU][ANYOF] = compare_exactf_anyof;
5663 dispatch[NOTHING][ANYOF] = compare_left_tail;
5664 dispatch[TAIL][ANYOF] = compare_left_tail;
5665 dispatch[STAR][ANYOF] = compare_mismatch;
5666 dispatch[PLUS][ANYOF] = compare_left_plus;
5667 dispatch[CURLY][ANYOF] = compare_left_curly;
5668 dispatch[CURLYM][ANYOF] = compare_left_curly;
5669 dispatch[CURLYX][ANYOF] = compare_left_curly;
5670 dispatch[WHILEM][ANYOF] = compare_left_tail;
5671 dispatch[OPEN][ANYOF] = compare_left_open;
5672 dispatch[CLOSE][ANYOF] = compare_left_tail;
5673 dispatch[IFMATCH][ANYOF] = compare_after_assertion;
5674 dispatch[UNLESSM][ANYOF] = compare_after_assertion;
5675 dispatch[MINMOD][ANYOF] = compare_left_tail;
5676 dispatch[LNBREAK][ANYOF] = compare_mismatch;
5677 dispatch[OPTIMIZED][ANYOF] = compare_left_tail;
5678
5679 dispatch[SUCCEED][ANYOFD] = compare_left_tail;
5680 dispatch[MBOL][ANYOFD] = compare_bol;
5681 dispatch[SBOL][ANYOFD] = compare_bol;
5682 dispatch[BOUND][ANYOFD] = compare_mismatch;
5683 dispatch[NBOUND][ANYOFD] = compare_mismatch;
5684 dispatch[REG_ANY][ANYOFD] = compare_reg_any_anyof;
5685 dispatch[SANY][ANYOFD] = compare_sany_anyof;
5686 dispatch[ANYOF][ANYOFD] = compare_anyof_anyof;
5687 dispatch[ANYOFD][ANYOFD] = compare_anyof_anyof;
5688 #ifdef RC_ANYOFR
5689 dispatch[ANYOFR][ANYOFD] = compare_anyofr_anyof;
5690 #endif
5691 dispatch[ANYOFM][ANYOFD] = compare_anyofm_anyof;
5692 dispatch[NANYOFM][ANYOFD] = compare_mismatch;
5693 dispatch[POSIXD][ANYOFD] = compare_posix_anyof;
5694 dispatch[POSIXU][ANYOFD] = compare_posix_anyof;
5695 dispatch[POSIXA][ANYOFD] = compare_posix_anyof;
5696 dispatch[NPOSIXD][ANYOFD] = compare_negative_posix_anyof;
5697 dispatch[NPOSIXU][ANYOFD] = compare_negative_posix_anyof;
5698 dispatch[NPOSIXA][ANYOFD] = compare_negative_posix_anyof;
5699 dispatch[BRANCH][ANYOFD] = compare_left_branch;
5700 dispatch[EXACT][ANYOFD] = compare_exact_anyof;
5701 dispatch[EXACTFU][ANYOFD] = compare_exactf_anyof;
5702 dispatch[NOTHING][ANYOFD] = compare_left_tail;
5703 dispatch[TAIL][ANYOFD] = compare_left_tail;
5704 dispatch[STAR][ANYOFD] = compare_mismatch;
5705 dispatch[PLUS][ANYOFD] = compare_left_plus;
5706 dispatch[CURLY][ANYOFD] = compare_left_curly;
5707 dispatch[CURLYM][ANYOFD] = compare_left_curly;
5708 dispatch[CURLYX][ANYOFD] = compare_left_curly;
5709 dispatch[OPEN][ANYOFD] = compare_left_open;
5710 dispatch[CLOSE][ANYOFD] = compare_left_tail;
5711 dispatch[IFMATCH][ANYOFD] = compare_after_assertion;
5712 dispatch[UNLESSM][ANYOFD] = compare_after_assertion;
5713 dispatch[MINMOD][ANYOFD] = compare_left_tail;
5714 dispatch[LNBREAK][ANYOFD] = compare_mismatch;
5715 dispatch[OPTIMIZED][ANYOFD] = compare_left_tail;
5716
5717 #ifdef RC_ANYOFR
5718 dispatch[SUCCEED][ANYOFR] = compare_left_tail;
5719 dispatch[MBOL][ANYOFR] = compare_bol;
5720 dispatch[SBOL][ANYOFR] = compare_bol;
5721 dispatch[BOUND][ANYOFR] = compare_mismatch;
5722 dispatch[NBOUND][ANYOFR] = compare_mismatch;
5723 dispatch[REG_ANY][ANYOFR] = compare_mismatch;
5724 dispatch[SANY][ANYOFR] = compare_mismatch;
5725 dispatch[ANYOF][ANYOFR] = compare_anyof_anyofr;
5726 dispatch[ANYOFR][ANYOFR] = compare_anyofr_anyofr;
5727 dispatch[ANYOFM][ANYOFR] = compare_anyofm_anyofr;
5728 dispatch[NANYOFM][ANYOFR] = compare_mismatch;
5729 dispatch[POSIXD][ANYOFR] = compare_mismatch;
5730 dispatch[POSIXU][ANYOFR] = compare_mismatch;
5731 dispatch[POSIXA][ANYOFR] = compare_mismatch;
5732 dispatch[NPOSIXD][ANYOFR] = compare_mismatch;
5733 dispatch[NPOSIXU][ANYOFR] = compare_mismatch;
5734 dispatch[NPOSIXA][ANYOFR] = compare_mismatch;
5735 dispatch[BRANCH][ANYOFR] = compare_left_branch;
5736 dispatch[EXACT][ANYOFR] = compare_exact_anyofr;
5737 dispatch[EXACTF][ANYOFR] = compare_exactf_anyofr;
5738 dispatch[EXACTFU][ANYOFR] = compare_exactf_anyofr;
5739 dispatch[NOTHING][ANYOFR] = compare_left_tail;
5740 dispatch[TAIL][ANYOFR] = compare_left_tail;
5741 dispatch[STAR][ANYOFR] = compare_mismatch;
5742 dispatch[PLUS][ANYOFR] = compare_left_plus;
5743 dispatch[CURLY][ANYOFR] = compare_left_curly;
5744 dispatch[CURLYM][ANYOFR] = compare_left_curly;
5745 dispatch[CURLYX][ANYOFR] = compare_left_curly;
5746 dispatch[OPEN][ANYOFR] = compare_left_open;
5747 dispatch[CLOSE][ANYOFR] = compare_left_tail;
5748 dispatch[IFMATCH][ANYOFR] = compare_after_assertion;
5749 dispatch[UNLESSM][ANYOFR] = compare_after_assertion;
5750 dispatch[MINMOD][ANYOFR] = compare_left_tail;
5751 dispatch[LNBREAK][ANYOFR] = compare_mismatch;
5752 dispatch[OPTIMIZED][ANYOFR] = compare_left_tail;
5753 #endif
5754
5755 dispatch[SUCCEED][ANYOFM] = compare_left_tail;
5756 dispatch[MBOL][ANYOFM] = compare_bol;
5757 dispatch[SBOL][ANYOFM] = compare_bol;
5758 dispatch[BOUND][ANYOFM] = compare_mismatch;
5759 dispatch[NBOUND][ANYOFM] = compare_mismatch;
5760 dispatch[REG_ANY][ANYOFM] = compare_mismatch;
5761 dispatch[SANY][ANYOFM] = compare_mismatch;
5762 dispatch[ANYOF][ANYOFM] = compare_anyof_anyofm;
5763 dispatch[ANYOFD][ANYOFM] = compare_anyof_anyofm;
5764 #ifdef RC_ANYOFR
5765 dispatch[ANYOFR][ANYOFM] = compare_anyofr_anyofm;
5766 #endif
5767 dispatch[ANYOFM][ANYOFM] = compare_anyofm_anyofm;
5768 dispatch[NANYOFM][ANYOFM] = compare_mismatch;
5769 dispatch[POSIXD][ANYOFM] = compare_mismatch;
5770 dispatch[POSIXU][ANYOFM] = compare_mismatch;
5771 dispatch[POSIXA][ANYOFM] = compare_mismatch;
5772 dispatch[NPOSIXD][ANYOFM] = compare_mismatch;
5773 dispatch[NPOSIXU][ANYOFM] = compare_mismatch;
5774 dispatch[NPOSIXA][ANYOFM] = compare_mismatch;
5775 dispatch[BRANCH][ANYOFM] = compare_left_branch;
5776 dispatch[EXACT][ANYOFM] = compare_exact_anyofm;
5777 dispatch[EXACTF][ANYOFM] = compare_exactf_anyofm;
5778 dispatch[EXACTFU][ANYOFM] = compare_exactf_anyofm;
5779 dispatch[NOTHING][ANYOFM] = compare_left_tail;
5780 dispatch[TAIL][ANYOFM] = compare_left_tail;
5781 dispatch[STAR][ANYOFM] = compare_mismatch;
5782 dispatch[PLUS][ANYOFM] = compare_left_plus;
5783 dispatch[CURLY][ANYOFM] = compare_left_curly;
5784 dispatch[CURLYM][ANYOFM] = compare_left_curly;
5785 dispatch[CURLYX][ANYOFM] = compare_left_curly;
5786 dispatch[OPEN][ANYOFM] = compare_left_open;
5787 dispatch[CLOSE][ANYOFM] = compare_left_tail;
5788 dispatch[IFMATCH][ANYOFM] = compare_after_assertion;
5789 dispatch[UNLESSM][ANYOFM] = compare_after_assertion;
5790 dispatch[MINMOD][ANYOFM] = compare_left_tail;
5791 dispatch[LNBREAK][ANYOFM] = compare_mismatch;
5792 dispatch[OPTIMIZED][ANYOFM] = compare_left_tail;
5793
5794 dispatch[SUCCEED][NANYOFM] = compare_left_tail;
5795 dispatch[MBOL][NANYOFM] = compare_bol;
5796 dispatch[SBOL][NANYOFM] = compare_bol;
5797 dispatch[BOUND][NANYOFM] = compare_mismatch;
5798 dispatch[NBOUND][NANYOFM] = compare_mismatch;
5799 dispatch[REG_ANY][NANYOFM] = compare_mismatch;
5800 dispatch[SANY][NANYOFM] = compare_mismatch;
5801 dispatch[ANYOF][NANYOFM] = compare_anyof_nanyofm;
5802 dispatch[ANYOFD][NANYOFM] = compare_anyof_nanyofm;
5803 #ifdef RC_ANYOFR
5804 dispatch[ANYOFR][NANYOFM] = compare_anyofr_nanyofm;
5805 #endif
5806 dispatch[ANYOFM][NANYOFM] = compare_anyofm_nanyofm;
5807 dispatch[NANYOFM][NANYOFM] = compare_nanyofm_nanyofm;
5808 dispatch[POSIXD][NANYOFM] = compare_posix_nanyofm;
5809 dispatch[POSIXU][NANYOFM] = compare_posix_nanyofm;
5810 dispatch[POSIXA][NANYOFM] = compare_posix_nanyofm;
5811 dispatch[NPOSIXD][NANYOFM] = compare_negative_posix_nanyofm;
5812 dispatch[NPOSIXU][NANYOFM] = compare_negative_posix_nanyofm;
5813 dispatch[NPOSIXA][NANYOFM] = compare_negative_posix_nanyofm;
5814 dispatch[BRANCH][NANYOFM] = compare_left_branch;
5815 dispatch[EXACT][NANYOFM] = compare_exact_nanyofm;
5816 dispatch[EXACTF][NANYOFM] = compare_exactf_nanyofm;
5817 dispatch[EXACTFU][NANYOFM] = compare_exactf_nanyofm;
5818 dispatch[NOTHING][NANYOFM] = compare_left_tail;
5819 dispatch[TAIL][NANYOFM] = compare_left_tail;
5820 dispatch[STAR][NANYOFM] = compare_mismatch;
5821 dispatch[PLUS][NANYOFM] = compare_left_plus;
5822 dispatch[CURLY][NANYOFM] = compare_left_curly;
5823 dispatch[CURLYM][NANYOFM] = compare_left_curly;
5824 dispatch[CURLYX][NANYOFM] = compare_left_curly;
5825 dispatch[OPEN][NANYOFM] = compare_left_open;
5826 dispatch[CLOSE][NANYOFM] = compare_left_tail;
5827 dispatch[IFMATCH][NANYOFM] = compare_after_assertion;
5828 dispatch[UNLESSM][NANYOFM] = compare_after_assertion;
5829 dispatch[MINMOD][NANYOFM] = compare_left_tail;
5830 dispatch[LNBREAK][NANYOFM] = compare_mismatch;
5831 dispatch[OPTIMIZED][NANYOFM] = compare_left_tail;
5832
5833 dispatch[SUCCEED][POSIXD] = compare_left_tail;
5834 dispatch[MBOL][POSIXD] = compare_bol;
5835 dispatch[SBOL][POSIXD] = compare_bol;
5836 dispatch[BOUND][POSIXD] = compare_mismatch;
5837 dispatch[NBOUND][POSIXD] = compare_mismatch;
5838 dispatch[REG_ANY][POSIXD] = compare_mismatch;
5839 dispatch[SANY][POSIXD] = compare_mismatch;
5840 dispatch[ANYOF][POSIXD] = compare_anyof_posix;
5841 dispatch[ANYOFD][POSIXD] = compare_anyof_posix;
5842 #ifdef RC_ANYOFR
5843 dispatch[ANYOFR][POSIXD] = compare_anyofr_posix;
5844 #endif
5845 dispatch[ANYOFM][POSIXD] = compare_anyofm_posix;
5846 dispatch[NANYOFM][POSIXD] = compare_nanyofm_posix;
5847 dispatch[POSIXD][POSIXD] = compare_posix_posix;
5848 dispatch[POSIXU][POSIXD] = compare_posix_posix;
5849 dispatch[POSIXA][POSIXD] = compare_posix_posix;
5850 dispatch[NPOSIXD][POSIXD] = compare_mismatch;
5851 dispatch[NPOSIXU][POSIXD] = compare_mismatch;
5852 dispatch[NPOSIXA][POSIXD] = compare_mismatch;
5853 dispatch[BRANCH][POSIXD] = compare_left_branch;
5854 dispatch[EXACT][POSIXD] = compare_exact_posix;
5855 dispatch[EXACTF][POSIXD] = compare_exactf_posix;
5856 dispatch[EXACTFU][POSIXD] = compare_exactf_posix;
5857 dispatch[NOTHING][POSIXD] = compare_left_tail;
5858 dispatch[STAR][POSIXD] = compare_mismatch;
5859 dispatch[PLUS][POSIXD] = compare_left_plus;
5860 dispatch[CURLY][POSIXD] = compare_left_curly;
5861 dispatch[CURLYM][POSIXD] = compare_left_curly;
5862 dispatch[CURLYX][POSIXD] = compare_left_curly;
5863 dispatch[OPEN][POSIXD] = compare_left_open;
5864 dispatch[CLOSE][POSIXD] = compare_left_tail;
5865 dispatch[IFMATCH][POSIXD] = compare_after_assertion;
5866 dispatch[UNLESSM][POSIXD] = compare_after_assertion;
5867 dispatch[MINMOD][POSIXD] = compare_left_tail;
5868 dispatch[LNBREAK][POSIXD] = compare_mismatch;
5869 dispatch[OPTIMIZED][POSIXD] = compare_left_tail;
5870
5871 dispatch[SUCCEED][POSIXU] = compare_left_tail;
5872 dispatch[MBOL][POSIXU] = compare_bol;
5873 dispatch[SBOL][POSIXU] = compare_bol;
5874 dispatch[BOUND][POSIXU] = compare_mismatch;
5875 dispatch[NBOUND][POSIXU] = compare_mismatch;
5876 dispatch[REG_ANY][POSIXU] = compare_mismatch;
5877 dispatch[SANY][POSIXU] = compare_mismatch;
5878 dispatch[ANYOF][POSIXU] = compare_anyof_posix;
5879 dispatch[ANYOFD][POSIXU] = compare_anyof_posix;
5880 #ifdef RC_ANYOFR
5881 dispatch[ANYOFR][POSIXU] = compare_anyofr_posix;
5882 #endif
5883 dispatch[ANYOFM][POSIXU] = compare_anyofm_posix;
5884 dispatch[NANYOFM][POSIXU] = compare_nanyofm_posix;
5885 dispatch[POSIXD][POSIXU] = compare_posix_posix;
5886 dispatch[POSIXA][POSIXU] = compare_posix_posix;
5887 dispatch[POSIXU][POSIXU] = compare_posix_posix;
5888 dispatch[NPOSIXD][POSIXU] = compare_mismatch;
5889 dispatch[NPOSIXU][POSIXU] = compare_mismatch;
5890 dispatch[NPOSIXA][POSIXU] = compare_mismatch;
5891 dispatch[BRANCH][POSIXU] = compare_left_branch;
5892 dispatch[EXACT][POSIXU] = compare_exact_posix;
5893 dispatch[EXACTF][POSIXU] = compare_exact_posix;
5894 dispatch[EXACTFU][POSIXU] = compare_exact_posix;
5895 dispatch[NOTHING][POSIXU] = compare_left_tail;
5896 dispatch[TAIL][POSIXU] = compare_left_tail;
5897 dispatch[STAR][POSIXU] = compare_mismatch;
5898 dispatch[PLUS][POSIXU] = compare_left_plus;
5899 dispatch[CURLY][POSIXU] = compare_left_curly;
5900 dispatch[CURLYM][POSIXU] = compare_left_curly;
5901 dispatch[CURLYX][POSIXU] = compare_left_curly;
5902 dispatch[OPEN][POSIXU] = compare_left_open;
5903 dispatch[CLOSE][POSIXU] = compare_left_tail;
5904 dispatch[IFMATCH][POSIXU] = compare_after_assertion;
5905 dispatch[UNLESSM][POSIXU] = compare_after_assertion;
5906 dispatch[MINMOD][POSIXU] = compare_left_tail;
5907 dispatch[LNBREAK][POSIXU] = compare_mismatch;
5908 dispatch[OPTIMIZED][POSIXU] = compare_left_tail;
5909
5910 dispatch[SUCCEED][POSIXA] = compare_left_tail;
5911 dispatch[MBOL][POSIXA] = compare_bol;
5912 dispatch[SBOL][POSIXA] = compare_bol;
5913 dispatch[BOUND][POSIXA] = compare_mismatch;
5914 dispatch[NBOUND][POSIXA] = compare_mismatch;
5915 dispatch[REG_ANY][POSIXA] = compare_mismatch;
5916 dispatch[SANY][POSIXA] = compare_mismatch;
5917 dispatch[ANYOF][POSIXA] = compare_anyof_posixa;
5918 dispatch[ANYOFD][POSIXA] = compare_anyof_posixa;
5919 #ifdef RC_ANYOFR
5920 dispatch[ANYOFR][POSIXA] = compare_anyofr_posix;
5921 #endif
5922 dispatch[ANYOFM][POSIXA] = compare_anyofm_posix;
5923 dispatch[NANYOFM][POSIXA] = compare_nanyofm_posix;
5924 dispatch[POSIXD][POSIXA] = compare_mismatch;
5925 dispatch[POSIXU][POSIXA] = compare_mismatch;
5926 dispatch[POSIXA][POSIXA] = compare_posix_posix;
5927 dispatch[NPOSIXD][POSIXA] = compare_mismatch;
5928 dispatch[NPOSIXU][POSIXA] = compare_mismatch;
5929 dispatch[NPOSIXA][POSIXA] = compare_mismatch;
5930 dispatch[BRANCH][POSIXA] = compare_left_branch;
5931 dispatch[EXACT][POSIXA] = compare_exact_posix;
5932 dispatch[EXACTF][POSIXA] = compare_exact_posix;
5933 dispatch[EXACTFU][POSIXA] = compare_exact_posix;
5934 dispatch[NOTHING][POSIXA] = compare_left_tail;
5935 dispatch[STAR][POSIXA] = compare_mismatch;
5936 dispatch[PLUS][POSIXA] = compare_left_plus;
5937 dispatch[CURLY][POSIXA] = compare_left_curly;
5938 dispatch[CURLYM][POSIXA] = compare_left_curly;
5939 dispatch[CURLYX][POSIXA] = compare_left_curly;
5940 dispatch[OPEN][POSIXA] = compare_left_open;
5941 dispatch[CLOSE][POSIXA] = compare_left_tail;
5942 dispatch[IFMATCH][POSIXA] = compare_after_assertion;
5943 dispatch[UNLESSM][POSIXA] = compare_after_assertion;
5944 dispatch[MINMOD][POSIXA] = compare_left_tail;
5945 dispatch[LNBREAK][POSIXA] = compare_mismatch;
5946 dispatch[OPTIMIZED][POSIXA] = compare_left_tail;
5947
5948 dispatch[SUCCEED][NPOSIXD] = compare_left_tail;
5949 dispatch[MBOL][NPOSIXD] = compare_bol;
5950 dispatch[SBOL][NPOSIXD] = compare_bol;
5951 dispatch[BOUND][NPOSIXD] = compare_mismatch;
5952 dispatch[NBOUND][NPOSIXD] = compare_mismatch;
5953 dispatch[REG_ANY][NPOSIXD] = compare_mismatch;
5954 dispatch[SANY][NPOSIXD] = compare_mismatch;
5955 dispatch[ANYOF][NPOSIXD] = compare_anyof_negative_posix;
5956 dispatch[ANYOFD][NPOSIXD] = compare_anyof_negative_posix;
5957 #ifdef RC_ANYOFR
5958 dispatch[ANYOFR][NPOSIXD] = compare_anyofr_negative_posix;
5959 #endif
5960 dispatch[ANYOFM][NPOSIXD] = compare_anyofm_negative_posix;
5961 dispatch[NANYOFM][NPOSIXD] = compare_nanyofm_negative_posix;
5962 dispatch[POSIXD][NPOSIXD] = compare_posix_negative_posix;
5963 dispatch[POSIXU][NPOSIXD] = compare_posix_negative_posix;
5964 dispatch[POSIXA][NPOSIXD] = compare_posix_negative_posix;
5965 dispatch[NPOSIXD][NPOSIXD] = compare_negative_posix_negative_posix;
5966 dispatch[NPOSIXU][NPOSIXD] = compare_negative_posix_negative_posix;
5967 dispatch[NPOSIXA][NPOSIXD] = compare_mismatch;
5968 dispatch[BRANCH][NPOSIXD] = compare_left_branch;
5969 dispatch[EXACT][NPOSIXD] = compare_exact_negative_posix;
5970 dispatch[EXACTF][NPOSIXD] = compare_exactf_negative_posix;
5971 dispatch[EXACTFU][NPOSIXD] = compare_exactf_negative_posix;
5972 dispatch[NOTHING][NPOSIXD] = compare_left_tail;
5973 dispatch[STAR][NPOSIXD] = compare_mismatch;
5974 dispatch[PLUS][NPOSIXD] = compare_left_plus;
5975 dispatch[CURLY][NPOSIXD] = compare_left_curly;
5976 dispatch[CURLYM][NPOSIXD] = compare_left_curly;
5977 dispatch[CURLYX][NPOSIXD] = compare_left_curly;
5978 dispatch[OPEN][NPOSIXD] = compare_left_open;
5979 dispatch[CLOSE][NPOSIXD] = compare_left_tail;
5980 dispatch[IFMATCH][NPOSIXD] = compare_after_assertion;
5981 dispatch[UNLESSM][NPOSIXD] = compare_after_assertion;
5982 dispatch[MINMOD][NPOSIXD] = compare_left_tail;
5983 dispatch[LNBREAK][NPOSIXD] = compare_mismatch;
5984 dispatch[OPTIMIZED][NPOSIXD] = compare_left_tail;
5985
5986 dispatch[SUCCEED][NPOSIXU] = compare_left_tail;
5987 dispatch[MBOL][NPOSIXU] = compare_bol;
5988 dispatch[SBOL][NPOSIXU] = compare_bol;
5989 dispatch[BOUND][NPOSIXU] = compare_mismatch;
5990 dispatch[NBOUND][NPOSIXU] = compare_mismatch;
5991 dispatch[REG_ANY][NPOSIXU] = compare_mismatch;
5992 dispatch[SANY][NPOSIXU] = compare_mismatch;
5993 dispatch[ANYOF][NPOSIXU] = compare_anyof_negative_posix;
5994 dispatch[ANYOFD][NPOSIXU] = compare_anyof_negative_posix;
5995 #ifdef RC_ANYOFR
5996 dispatch[ANYOFR][NPOSIXU] = compare_anyofr_negative_posix;
5997 #endif
5998 dispatch[ANYOFM][NPOSIXU] = compare_anyofm_negative_posix;
5999 dispatch[NANYOFM][NPOSIXU] = compare_nanyofm_negative_posix;
6000 dispatch[POSIXD][NPOSIXU] = compare_posix_negative_posix;
6001 dispatch[POSIXU][NPOSIXU] = compare_posix_negative_posix;
6002 dispatch[POSIXA][NPOSIXU] = compare_posix_negative_posix;
6003 dispatch[NPOSIXD][NPOSIXU] = compare_negative_posix_negative_posix;
6004 dispatch[NPOSIXU][NPOSIXU] = compare_negative_posix_negative_posix;
6005 dispatch[NPOSIXA][NPOSIXU] = compare_mismatch;
6006 dispatch[BRANCH][NPOSIXU] = compare_left_branch;
6007 dispatch[EXACT][NPOSIXU] = compare_exact_negative_posix;
6008 dispatch[EXACTF][NPOSIXU] = compare_exactf_negative_posix;
6009 dispatch[EXACTFU][NPOSIXU] = compare_exactf_negative_posix;
6010 dispatch[NOTHING][NPOSIXU] = compare_left_tail;
6011 dispatch[STAR][NPOSIXU] = compare_mismatch;
6012 dispatch[PLUS][NPOSIXU] = compare_left_plus;
6013 dispatch[CURLY][NPOSIXU] = compare_left_curly;
6014 dispatch[CURLYM][NPOSIXU] = compare_left_curly;
6015 dispatch[CURLYX][NPOSIXU] = compare_left_curly;
6016 dispatch[OPEN][NPOSIXU] = compare_left_open;
6017 dispatch[CLOSE][NPOSIXU] = compare_left_tail;
6018 dispatch[IFMATCH][NPOSIXU] = compare_after_assertion;
6019 dispatch[UNLESSM][NPOSIXU] = compare_after_assertion;
6020 dispatch[MINMOD][NPOSIXU] = compare_left_tail;
6021 dispatch[LNBREAK][NPOSIXU] = compare_mismatch;
6022 dispatch[OPTIMIZED][NPOSIXU] = compare_left_tail;
6023
6024 dispatch[SUCCEED][NPOSIXA] = compare_left_tail;
6025 dispatch[MBOL][NPOSIXA] = compare_bol;
6026 dispatch[SBOL][NPOSIXA] = compare_bol;
6027 dispatch[BOUND][NPOSIXA] = compare_mismatch;
6028 dispatch[NBOUND][NPOSIXA] = compare_mismatch;
6029 dispatch[REG_ANY][NPOSIXA] = compare_mismatch;
6030 dispatch[SANY][NPOSIXA] = compare_mismatch;
6031 dispatch[ANYOF][NPOSIXA] = compare_anyof_negative_posix;
6032 dispatch[ANYOFD][NPOSIXA] = compare_anyof_negative_posix;
6033 #ifdef RC_ANYOFR
6034 dispatch[ANYOFR][NPOSIXA] = compare_anyofr_negative_posix;
6035 #endif
6036 dispatch[ANYOFM][NPOSIXA] = compare_anyofm_negative_posix;
6037 dispatch[NANYOFM][NPOSIXA] = compare_nanyofm_negative_posix;
6038 dispatch[POSIXD][NPOSIXA] = compare_posix_negative_posix;
6039 dispatch[POSIXU][NPOSIXA] = compare_posix_negative_posix;
6040 dispatch[POSIXA][NPOSIXA] = compare_posix_negative_posix;
6041 dispatch[NPOSIXD][NPOSIXA] = compare_negative_posix_negative_posix;
6042 dispatch[NPOSIXU][NPOSIXA] = compare_negative_posix_negative_posix;
6043 dispatch[NPOSIXA][NPOSIXA] = compare_negative_posix_negative_posix;
6044 dispatch[BRANCH][NPOSIXA] = compare_left_branch;
6045 dispatch[EXACT][NPOSIXA] = compare_exact_negative_posix;
6046 dispatch[EXACTF][NPOSIXA] = compare_exactf_negative_posix;
6047 dispatch[EXACTFU][NPOSIXA] = compare_exactf_negative_posix;
6048 dispatch[NOTHING][NPOSIXA] = compare_left_tail;
6049 dispatch[STAR][NPOSIXA] = compare_mismatch;
6050 dispatch[PLUS][NPOSIXA] = compare_left_plus;
6051 dispatch[CURLY][NPOSIXA] = compare_left_curly;
6052 dispatch[CURLYM][NPOSIXA] = compare_left_curly;
6053 dispatch[CURLYX][NPOSIXA] = compare_left_curly;
6054 dispatch[OPEN][NPOSIXA] = compare_left_open;
6055 dispatch[CLOSE][NPOSIXA] = compare_left_tail;
6056 dispatch[IFMATCH][NPOSIXA] = compare_after_assertion;
6057 dispatch[UNLESSM][NPOSIXA] = compare_after_assertion;
6058 dispatch[MINMOD][NPOSIXA] = compare_left_tail;
6059 dispatch[LNBREAK][NPOSIXA] = compare_mismatch;
6060 dispatch[OPTIMIZED][NPOSIXA] = compare_left_tail;
6061
6062 for (i = 0; i < REGNODE_MAX; ++i)
6063 {
6064 dispatch[i][BRANCH] = compare_right_branch;
6065 }
6066
6067 dispatch[SUCCEED][BRANCH] = compare_left_tail;
6068 dispatch[ANYOF][BRANCH] = compare_anyof_branch;
6069 dispatch[ANYOFD][BRANCH] = compare_anyof_branch;
6070 #ifdef RC_ANYOFR
6071 dispatch[ANYOFR][BRANCH] = compare_anyofr_branch;
6072 #endif
6073 dispatch[ANYOFM][BRANCH] = compare_anyofm_branch;
6074 dispatch[BRANCH][BRANCH] = compare_left_branch;
6075 dispatch[NOTHING][BRANCH] = compare_left_tail;
6076 dispatch[TAIL][BRANCH] = compare_left_tail;
6077 dispatch[WHILEM][BRANCH] = compare_left_tail;
6078 dispatch[OPEN][BRANCH] = compare_left_open;
6079 dispatch[CLOSE][BRANCH] = compare_left_tail;
6080 dispatch[IFMATCH][BRANCH] = compare_after_assertion;
6081 dispatch[UNLESSM][BRANCH] = compare_after_assertion;
6082 dispatch[MINMOD][BRANCH] = compare_left_tail;
6083 dispatch[OPTIMIZED][BRANCH] = compare_left_tail;
6084
6085 dispatch[SUCCEED][EXACT] = compare_left_tail;
6086 dispatch[MBOL][EXACT] = compare_bol;
6087 dispatch[SBOL][EXACT] = compare_bol;
6088 dispatch[BOUND][EXACT] = compare_mismatch;
6089 dispatch[NBOUND][EXACT] = compare_mismatch;
6090 dispatch[REG_ANY][EXACT] = compare_mismatch;
6091 dispatch[SANY][EXACT] = compare_mismatch;
6092 dispatch[ANYOF][EXACT] = compare_anyof_exact;
6093 dispatch[ANYOFD][EXACT] = compare_anyof_exact;
6094 #ifdef RC_ANYOFR
6095 dispatch[ANYOFR][EXACT] = compare_anyofr_exact;
6096 #endif
6097 dispatch[ANYOFM][EXACT] = compare_anyofm_exact;
6098 dispatch[NANYOFM][EXACT] = compare_mismatch;
6099 dispatch[POSIXD][EXACT] = compare_mismatch;
6100 dispatch[POSIXU][EXACT] = compare_mismatch;
6101 dispatch[POSIXA][EXACT] = compare_mismatch;
6102 dispatch[NPOSIXD][EXACT] = compare_mismatch;
6103 dispatch[NPOSIXU][EXACT] = compare_mismatch;
6104 dispatch[NPOSIXA][EXACT] = compare_mismatch;
6105 dispatch[BRANCH][EXACT] = compare_left_branch;
6106 dispatch[EXACT][EXACT] = compare_exact_exact;
6107 dispatch[EXACTF][EXACT] = compare_exactf_exact;
6108 dispatch[EXACTFU][EXACT] = compare_exactf_exact;
6109 dispatch[NOTHING][EXACT] = compare_left_tail;
6110 dispatch[TAIL][EXACT] = compare_left_tail;
6111 dispatch[STAR][EXACT] = compare_mismatch;
6112 dispatch[PLUS][EXACT] = compare_left_plus;
6113 dispatch[CURLY][EXACT] = compare_left_curly;
6114 dispatch[CURLYM][EXACT] = compare_left_curly;
6115 dispatch[CURLYX][EXACT] = compare_left_curly;
6116 dispatch[WHILEM][EXACT] = compare_left_tail;
6117 dispatch[OPEN][EXACT] = compare_left_open;
6118 dispatch[CLOSE][EXACT] = compare_left_tail;
6119 dispatch[IFMATCH][EXACT] = compare_after_assertion;
6120 dispatch[UNLESSM][EXACT] = compare_after_assertion;
6121 dispatch[MINMOD][EXACT] = compare_left_tail;
6122 dispatch[LNBREAK][EXACT] = compare_mismatch;
6123 dispatch[OPTIMIZED][EXACT] = compare_left_tail;
6124
6125 dispatch[SUCCEED][EXACTF] = compare_left_tail;
6126 dispatch[MBOL][EXACTF] = compare_bol;
6127 dispatch[SBOL][EXACTF] = compare_bol;
6128 dispatch[BOUND][EXACTF] = compare_mismatch;
6129 dispatch[NBOUND][EXACTF] = compare_mismatch;
6130 dispatch[REG_ANY][EXACTF] = compare_mismatch;
6131 dispatch[SANY][EXACTF] = compare_mismatch;
6132 dispatch[ANYOF][EXACTF] = compare_anyof_exactf;
6133 dispatch[ANYOFD][EXACTF] = compare_anyof_exactf;
6134 #ifdef RC_ANYOFR
6135 dispatch[ANYOFR][EXACTF] = compare_anyofr_exactf;
6136 #endif
6137 dispatch[ANYOFM][EXACTF] = compare_anyofm_exactf;
6138 dispatch[NANYOFM][EXACTF] = compare_mismatch;
6139 dispatch[POSIXD][EXACTF] = compare_mismatch;
6140 dispatch[POSIXU][EXACTF] = compare_mismatch;
6141 dispatch[POSIXA][EXACTF] = compare_mismatch;
6142 dispatch[NPOSIXD][EXACTF] = compare_mismatch;
6143 dispatch[NPOSIXU][EXACTF] = compare_mismatch;
6144 dispatch[NPOSIXA][EXACTF] = compare_mismatch;
6145 dispatch[BRANCH][EXACTF] = compare_left_branch;
6146 dispatch[EXACT][EXACTF] = compare_exact_exactf;
6147 dispatch[EXACTF][EXACTF] = compare_exactf_exactf;
6148 dispatch[NOTHING][EXACTF] = compare_left_tail;
6149 dispatch[TAIL][EXACTF] = compare_left_tail;
6150 dispatch[STAR][EXACTF] = compare_mismatch;
6151 dispatch[PLUS][EXACTF] = compare_left_plus;
6152 dispatch[CURLY][EXACTF] = compare_left_curly;
6153 dispatch[CURLYM][EXACTF] = compare_left_curly;
6154 dispatch[CURLYX][EXACTF] = compare_left_curly;
6155 dispatch[WHILEM][EXACTF] = compare_left_tail;
6156 dispatch[OPEN][EXACTF] = compare_left_open;
6157 dispatch[CLOSE][EXACTF] = compare_left_tail;
6158 dispatch[IFMATCH][EXACTF] = compare_after_assertion;
6159 dispatch[UNLESSM][EXACTF] = compare_after_assertion;
6160 dispatch[MINMOD][EXACTF] = compare_left_tail;
6161 dispatch[LNBREAK][EXACTF] = compare_mismatch;
6162 dispatch[OPTIMIZED][EXACTF] = compare_left_tail;
6163
6164 dispatch[SUCCEED][EXACTFU] = compare_left_tail;
6165 dispatch[MBOL][EXACTFU] = compare_bol;
6166 dispatch[SBOL][EXACTFU] = compare_bol;
6167 dispatch[BOUND][EXACTFU] = compare_mismatch;
6168 dispatch[NBOUND][EXACTFU] = compare_mismatch;
6169 dispatch[REG_ANY][EXACTFU] = compare_mismatch;
6170 dispatch[SANY][EXACTFU] = compare_mismatch;
6171 dispatch[ANYOF][EXACTFU] = compare_anyof_exactf;
6172 dispatch[ANYOFD][EXACTFU] = compare_anyof_exactf;
6173 #ifdef RC_ANYOFR
6174 dispatch[ANYOFR][EXACTFU] = compare_anyofr_exactf;
6175 #endif
6176 dispatch[ANYOFM][EXACTFU] = compare_anyofm_exactf;
6177 dispatch[NANYOFM][EXACTFU] = compare_mismatch;
6178 dispatch[POSIXD][EXACTFU] = compare_mismatch;
6179 dispatch[POSIXU][EXACTFU] = compare_mismatch;
6180 dispatch[POSIXA][EXACTFU] = compare_mismatch;
6181 dispatch[NPOSIXD][EXACTFU] = compare_mismatch;
6182 dispatch[NPOSIXU][EXACTFU] = compare_mismatch;
6183 dispatch[NPOSIXA][EXACTFU] = compare_mismatch;
6184 dispatch[BRANCH][EXACTFU] = compare_left_branch;
6185 dispatch[EXACT][EXACTFU] = compare_exact_exactf;
6186 dispatch[EXACTF][EXACTFU] = compare_exactf_exactf;
6187 dispatch[EXACTFU][EXACTFU] = compare_exactf_exactf;
6188 dispatch[NOTHING][EXACTFU] = compare_left_tail;
6189 dispatch[STAR][EXACTFU] = compare_mismatch;
6190 dispatch[PLUS][EXACTFU] = compare_left_plus;
6191 dispatch[CURLY][EXACTFU] = compare_left_curly;
6192 dispatch[CURLYM][EXACTFU] = compare_left_curly;
6193 dispatch[CURLYX][EXACTFU] = compare_left_curly;
6194 dispatch[OPEN][EXACTFU] = compare_left_open;
6195 dispatch[CLOSE][EXACTFU] = compare_left_tail;
6196 dispatch[IFMATCH][EXACTFU] = compare_after_assertion;
6197 dispatch[UNLESSM][EXACTFU] = compare_after_assertion;
6198 dispatch[MINMOD][EXACTFU] = compare_left_tail;
6199 dispatch[LNBREAK][EXACTFU] = compare_mismatch;
6200 dispatch[OPTIMIZED][EXACTFU] = compare_left_tail;
6201
6202 #ifdef RC_EXACT_ONLY8
6203 dispatch[EXACT_ONLY8][EXACT_ONLY8] = compare_exact_exact;
6204 #endif
6205 #ifdef RC_EXACT_REQ8
6206 dispatch[EXACT_REQ8][EXACT_REQ8] = compare_exact_exact;
6207 #endif
6208
6209 for (i = 0; i < REGNODE_MAX; ++i)
6210 {
6211 dispatch[i][NOTHING] = compare_next;
6212 }
6213
6214 dispatch[SUCCEED][NOTHING] = compare_tails;
6215 dispatch[NOTHING][NOTHING] = compare_tails;
6216 dispatch[TAIL][NOTHING] = compare_tails;
6217 dispatch[WHILEM][NOTHING] = compare_tails;
6218 dispatch[CLOSE][NOTHING] = compare_tails;
6219 dispatch[MINMOD][NOTHING] = compare_tails;
6220 dispatch[OPTIMIZED][NOTHING] = compare_tails;
6221
6222 for (i = 0; i < REGNODE_MAX; ++i)
6223 {
6224 dispatch[i][TAIL] = compare_next;
6225 }
6226
6227 dispatch[SUCCEED][TAIL] = compare_tails;
6228 dispatch[NOTHING][TAIL] = compare_tails;
6229 dispatch[TAIL][TAIL] = compare_tails;
6230 dispatch[WHILEM][TAIL] = compare_tails;
6231 dispatch[CLOSE][TAIL] = compare_tails;
6232 dispatch[MINMOD][TAIL] = compare_tails;
6233 dispatch[OPTIMIZED][TAIL] = compare_tails;
6234
6235 for (i = 0; i < REGNODE_MAX; ++i)
6236 {
6237 dispatch[i][STAR] = compare_right_star;
6238 }
6239
6240 dispatch[SUCCEED][STAR] = compare_left_tail;
6241 dispatch[EOS][STAR] = compare_tails;
6242 dispatch[EOL][STAR] = compare_tails;
6243 dispatch[MEOL][STAR] = compare_tails;
6244 dispatch[SEOL][STAR] = compare_tails;
6245 dispatch[NOTHING][STAR] = compare_left_tail;
6246 dispatch[TAIL][STAR] = compare_left_tail;
6247 dispatch[STAR][STAR] = compare_repeat_star;
6248 dispatch[PLUS][STAR] = compare_repeat_star;
6249 dispatch[CURLY][STAR] = compare_curly_star;
6250 dispatch[CURLYM][STAR] = compare_curly_star;
6251 dispatch[CURLYX][STAR] = compare_curly_star;
6252 dispatch[WHILEM][STAR] = compare_left_tail;
6253 dispatch[OPEN][STAR] = compare_left_open;
6254 dispatch[CLOSE][STAR] = compare_left_tail;
6255 dispatch[IFMATCH][STAR] = compare_after_assertion;
6256 dispatch[UNLESSM][STAR] = compare_after_assertion;
6257 dispatch[MINMOD][STAR] = compare_left_tail;
6258 dispatch[OPTIMIZED][STAR] = compare_left_tail;
6259
6260 for (i = 0; i < REGNODE_MAX; ++i)
6261 {
6262 dispatch[i][PLUS] = compare_right_plus;
6263 }
6264
6265 dispatch[SUCCEED][PLUS] = compare_left_tail;
6266 dispatch[NOTHING][PLUS] = compare_left_tail;
6267 dispatch[TAIL][PLUS] = compare_left_tail;
6268 dispatch[PLUS][PLUS] = compare_plus_plus;
6269 dispatch[CURLY][PLUS] = compare_curly_plus;
6270 dispatch[CURLYM][PLUS] = compare_curly_plus;
6271 dispatch[CURLYX][PLUS] = compare_curly_plus;
6272 dispatch[WHILEM][PLUS] = compare_left_tail;
6273 dispatch[OPEN][PLUS] = compare_left_open;
6274 dispatch[CLOSE][PLUS] = compare_left_tail;
6275 dispatch[IFMATCH][PLUS] = compare_after_assertion;
6276 dispatch[UNLESSM][PLUS] = compare_after_assertion;
6277 dispatch[MINMOD][PLUS] = compare_left_tail;
6278 dispatch[OPTIMIZED][PLUS] = compare_left_tail;
6279
6280 for (i = 0; i < REGNODE_MAX; ++i)
6281 {
6282 dispatch[i][CURLY] = compare_right_curly;
6283 }
6284
6285 dispatch[SUCCEED][CURLY] = compare_left_tail;
6286 dispatch[NOTHING][CURLY] = compare_left_tail;
6287 dispatch[TAIL][CURLY] = compare_left_tail;
6288 dispatch[PLUS][CURLY] = compare_plus_curly;
6289 dispatch[CURLY][CURLY] = compare_curly_curly;
6290 dispatch[CURLYM][CURLY] = compare_curly_curly;
6291 dispatch[CURLYX][CURLY] = compare_curly_curly;
6292 dispatch[WHILEM][CURLY] = compare_left_tail;
6293 dispatch[OPEN][CURLY] = compare_left_open;
6294 dispatch[CLOSE][CURLY] = compare_left_tail;
6295 dispatch[IFMATCH][CURLY] = compare_after_assertion;
6296 dispatch[UNLESSM][CURLY] = compare_after_assertion;
6297 dispatch[SUSPEND][CURLY] = compare_suspend_curly;
6298 dispatch[MINMOD][CURLY] = compare_left_tail;
6299 dispatch[OPTIMIZED][CURLY] = compare_left_tail;
6300
6301 for (i = 0; i < REGNODE_MAX; ++i)
6302 {
6303 dispatch[i][CURLYM] = compare_right_curly;
6304 }
6305
6306 dispatch[SUCCEED][CURLYM] = compare_left_tail;
6307 dispatch[NOTHING][CURLYM] = compare_left_tail;
6308 dispatch[TAIL][CURLYM] = compare_left_tail;
6309 dispatch[PLUS][CURLYM] = compare_plus_curly;
6310 dispatch[CURLY][CURLYM] = compare_curly_curly;
6311 dispatch[CURLYM][CURLYM] = compare_curly_curly;
6312 dispatch[CURLYX][CURLYM] = compare_curly_curly;
6313 dispatch[WHILEM][CURLYM] = compare_left_tail;
6314 dispatch[OPEN][CURLYM] = compare_left_open;
6315 dispatch[CLOSE][CURLYM] = compare_left_tail;
6316 dispatch[IFMATCH][CURLYM] = compare_after_assertion;
6317 dispatch[UNLESSM][CURLYM] = compare_after_assertion;
6318 dispatch[SUSPEND][CURLYM] = compare_suspend_curly;
6319 dispatch[MINMOD][CURLYM] = compare_left_tail;
6320 dispatch[OPTIMIZED][CURLYM] = compare_left_tail;
6321
6322 for (i = 0; i < REGNODE_MAX; ++i)
6323 {
6324 dispatch[i][CURLYX] = compare_right_curly;
6325 }
6326
6327 dispatch[SUCCEED][CURLYX] = compare_left_tail;
6328 dispatch[NOTHING][CURLYX] = compare_left_tail;
6329 dispatch[TAIL][CURLYX] = compare_left_tail;
6330 dispatch[PLUS][CURLYX] = compare_plus_curly;
6331 dispatch[CURLY][CURLYX] = compare_curly_curly;
6332 dispatch[CURLYM][CURLYX] = compare_curly_curly;
6333 dispatch[CURLYX][CURLYX] = compare_curly_curly;
6334 dispatch[WHILEM][CURLYX] = compare_left_tail;
6335 dispatch[OPEN][CURLYX] = compare_left_open;
6336 dispatch[CLOSE][CURLYX] = compare_left_tail;
6337 dispatch[IFMATCH][CURLYX] = compare_after_assertion;
6338 dispatch[UNLESSM][CURLYX] = compare_after_assertion;
6339 dispatch[SUSPEND][CURLYX] = compare_suspend_curly;
6340 dispatch[MINMOD][CURLYX] = compare_left_tail;
6341 dispatch[OPTIMIZED][CURLYX] = compare_left_tail;
6342
6343 for (i = 0; i < REGNODE_MAX; ++i)
6344 {
6345 dispatch[i][WHILEM] = compare_next;
6346 }
6347
6348 dispatch[SUCCEED][WHILEM] = compare_tails;
6349 dispatch[NOTHING][WHILEM] = compare_tails;
6350 dispatch[TAIL][WHILEM] = compare_tails;
6351 dispatch[WHILEM][WHILEM] = compare_tails;
6352 dispatch[CLOSE][WHILEM] = compare_tails;
6353 dispatch[MINMOD][WHILEM] = compare_tails;
6354 dispatch[OPTIMIZED][WHILEM] = compare_tails;
6355
6356 for (i = 0; i < REGNODE_MAX; ++i)
6357 {
6358 dispatch[i][OPEN] = compare_right_open;
6359 }
6360
6361 dispatch[OPEN][OPEN] = compare_open_open;
6362
6363 for (i = 0; i < REGNODE_MAX; ++i)
6364 {
6365 dispatch[i][CLOSE] = compare_next;
6366 }
6367
6368 dispatch[SUCCEED][CLOSE] = compare_tails;
6369 dispatch[NOTHING][CLOSE] = compare_tails;
6370 dispatch[TAIL][CLOSE] = compare_tails;
6371 dispatch[WHILEM][CLOSE] = compare_tails;
6372 dispatch[CLOSE][CLOSE] = compare_tails;
6373 dispatch[MINMOD][CLOSE] = compare_tails;
6374 dispatch[OPTIMIZED][CLOSE] = compare_tails;
6375
6376 dispatch[SUCCEED][IFMATCH] = compare_left_tail;
6377 dispatch[MBOL][IFMATCH] = compare_bol;
6378 dispatch[SBOL][IFMATCH] = compare_bol;
6379 dispatch[BOUND][IFMATCH] = compare_mismatch;
6380 dispatch[NBOUND][IFMATCH] = compare_mismatch;
6381 dispatch[REG_ANY][IFMATCH] = compare_mismatch;
6382 dispatch[SANY][IFMATCH] = compare_mismatch;
6383 dispatch[ANYOF][IFMATCH] = compare_mismatch;
6384 dispatch[ANYOFD][IFMATCH] = compare_mismatch;
6385 #ifdef RC_ANYOFR
6386 dispatch[ANYOFR][IFMATCH] = compare_mismatch;
6387 #endif
6388 dispatch[ANYOFM][IFMATCH] = compare_mismatch;
6389 dispatch[NANYOFM][IFMATCH] = compare_mismatch;
6390 dispatch[POSIXD][IFMATCH] = compare_mismatch;
6391 dispatch[POSIXU][IFMATCH] = compare_mismatch;
6392 dispatch[POSIXA][IFMATCH] = compare_mismatch;
6393 dispatch[NPOSIXD][IFMATCH] = compare_mismatch;
6394 dispatch[NPOSIXU][IFMATCH] = compare_mismatch;
6395 dispatch[NPOSIXA][IFMATCH] = compare_mismatch;
6396 dispatch[BRANCH][IFMATCH] = compare_mismatch;
6397 dispatch[EXACT][IFMATCH] = compare_mismatch;
6398 dispatch[EXACTF][IFMATCH] = compare_mismatch;
6399 dispatch[EXACTFU][IFMATCH] = compare_mismatch;
6400 dispatch[NOTHING][IFMATCH] = compare_left_tail;
6401 dispatch[TAIL][IFMATCH] = compare_left_tail;
6402 dispatch[STAR][IFMATCH] = compare_mismatch;
6403 dispatch[PLUS][IFMATCH] = compare_mismatch;
6404 dispatch[CURLY][IFMATCH] = compare_mismatch;
6405 dispatch[CURLYM][IFMATCH] = compare_mismatch;
6406 dispatch[CURLYX][IFMATCH] = compare_mismatch;
6407 dispatch[WHILEM][IFMATCH] = compare_left_tail;
6408 dispatch[OPEN][IFMATCH] = compare_left_open;
6409 dispatch[CLOSE][IFMATCH] = compare_left_tail;
6410 dispatch[IFMATCH][IFMATCH] = compare_positive_assertions;
6411 dispatch[UNLESSM][IFMATCH] = compare_mismatch;
6412 dispatch[MINMOD][IFMATCH] = compare_left_tail;
6413 dispatch[LNBREAK][IFMATCH] = compare_mismatch;
6414 dispatch[OPTIMIZED][IFMATCH] = compare_left_tail;
6415
6416 dispatch[SUCCEED][UNLESSM] = compare_left_tail;
6417 dispatch[MBOL][UNLESSM] = compare_bol;
6418 dispatch[SBOL][UNLESSM] = compare_bol;
6419 dispatch[BOUND][UNLESSM] = compare_mismatch;
6420 dispatch[NBOUND][UNLESSM] = compare_mismatch;
6421 dispatch[REG_ANY][UNLESSM] = compare_mismatch;
6422 dispatch[SANY][UNLESSM] = compare_mismatch;
6423 dispatch[ANYOF][UNLESSM] = compare_mismatch;
6424 dispatch[ANYOFD][UNLESSM] = compare_mismatch;
6425 #ifdef RC_ANYOFR
6426 dispatch[ANYOFR][UNLESSM] = compare_mismatch;
6427 #endif
6428 dispatch[ANYOFM][UNLESSM] = compare_mismatch;
6429 dispatch[NANYOFM][UNLESSM] = compare_mismatch;
6430 dispatch[POSIXD][UNLESSM] = compare_mismatch;
6431 dispatch[POSIXU][UNLESSM] = compare_mismatch;
6432 dispatch[POSIXA][UNLESSM] = compare_mismatch;
6433 dispatch[NPOSIXD][UNLESSM] = compare_mismatch;
6434 dispatch[NPOSIXU][UNLESSM] = compare_mismatch;
6435 dispatch[NPOSIXA][UNLESSM] = compare_mismatch;
6436 dispatch[BRANCH][UNLESSM] = compare_mismatch;
6437 dispatch[EXACT][UNLESSM] = compare_mismatch;
6438 dispatch[EXACTF][UNLESSM] = compare_mismatch;
6439 dispatch[EXACTFU][UNLESSM] = compare_mismatch;
6440 dispatch[NOTHING][UNLESSM] = compare_left_tail;
6441 dispatch[TAIL][UNLESSM] = compare_left_tail;
6442 dispatch[STAR][UNLESSM] = compare_mismatch;
6443 dispatch[PLUS][UNLESSM] = compare_mismatch;
6444 dispatch[CURLY][UNLESSM] = compare_mismatch;
6445 dispatch[CURLYM][UNLESSM] = compare_mismatch;
6446 dispatch[CURLYX][UNLESSM] = compare_mismatch;
6447 dispatch[WHILEM][UNLESSM] = compare_left_tail;
6448 dispatch[OPEN][UNLESSM] = compare_left_open;
6449 dispatch[CLOSE][UNLESSM] = compare_left_tail;
6450 dispatch[IFMATCH][UNLESSM] = compare_mismatch;
6451 dispatch[UNLESSM][UNLESSM] = compare_negative_assertions;
6452 dispatch[MINMOD][UNLESSM] = compare_left_tail;
6453 dispatch[LNBREAK][UNLESSM] = compare_mismatch;
6454 dispatch[OPTIMIZED][UNLESSM] = compare_left_tail;
6455
6456 dispatch[SUSPEND][SUSPEND] = compare_subexpressions;
6457
6458 for (i = 0; i < REGNODE_MAX; ++i)
6459 {
6460 dispatch[i][MINMOD] = compare_next;
6461 }
6462
6463 dispatch[SUCCEED][MINMOD] = compare_tails;
6464 dispatch[NOTHING][MINMOD] = compare_tails;
6465 dispatch[TAIL][MINMOD] = compare_tails;
6466 dispatch[WHILEM][MINMOD] = compare_tails;
6467 dispatch[CLOSE][MINMOD] = compare_tails;
6468 dispatch[MINMOD][MINMOD] = compare_tails;
6469 dispatch[OPTIMIZED][MINMOD] = compare_tails;
6470
6471 dispatch[SUCCEED][LNBREAK] = compare_left_tail;
6472 dispatch[SBOL][LNBREAK] = compare_bol;
6473 dispatch[MBOL][LNBREAK] = compare_bol;
6474 dispatch[BOUND][LNBREAK] = compare_mismatch;
6475 dispatch[NBOUND][LNBREAK] = compare_mismatch;
6476 dispatch[REG_ANY][LNBREAK] = compare_mismatch;
6477 dispatch[SANY][LNBREAK] = compare_mismatch;
6478 dispatch[ANYOF][LNBREAK] = compare_anyof_lnbreak;
6479 dispatch[ANYOFD][LNBREAK] = compare_anyof_lnbreak;
6480 #ifdef RC_ANYOFR
6481 dispatch[ANYOFR][LNBREAK] = compare_mismatch;
6482 #endif
6483 dispatch[ANYOFM][LNBREAK] = compare_mismatch;
6484 dispatch[NANYOFM][LNBREAK] = compare_mismatch;
6485 dispatch[POSIXD][LNBREAK] = compare_posix_lnbreak;
6486 dispatch[POSIXU][LNBREAK] = compare_posix_lnbreak;
6487 dispatch[POSIXA][LNBREAK] = compare_posix_lnbreak;
6488 dispatch[NPOSIXD][LNBREAK] = compare_mismatch;
6489 dispatch[NPOSIXU][LNBREAK] = compare_mismatch;
6490 dispatch[NPOSIXA][LNBREAK] = compare_mismatch;
6491 dispatch[BRANCH][LNBREAK] = compare_left_branch;
6492 dispatch[EXACT][LNBREAK] = compare_exact_lnbreak;
6493 dispatch[EXACTFU][LNBREAK] = compare_exact_lnbreak;
6494 dispatch[NOTHING][LNBREAK] = compare_left_tail;
6495 dispatch[TAIL][LNBREAK] = compare_left_tail;
6496 dispatch[STAR][LNBREAK] = compare_mismatch;
6497 dispatch[PLUS][LNBREAK] = compare_left_plus;
6498 dispatch[CURLY][LNBREAK] = compare_left_curly;
6499 dispatch[CURLYM][LNBREAK] = compare_left_curly;
6500 dispatch[CURLYX][LNBREAK] = compare_left_curly;
6501 dispatch[WHILEM][LNBREAK] = compare_left_tail;
6502 dispatch[OPEN][LNBREAK] = compare_left_open;
6503 dispatch[CLOSE][LNBREAK] = compare_left_tail;
6504 dispatch[IFMATCH][LNBREAK] = compare_after_assertion;
6505 dispatch[UNLESSM][LNBREAK] = compare_after_assertion;
6506 dispatch[MINMOD][LNBREAK] = compare_left_tail;
6507 dispatch[LNBREAK][LNBREAK] = compare_tails;
6508
6509 for (i = 0; i < REGNODE_MAX; ++i)
6510 {
6511 dispatch[i][OPTIMIZED] = compare_next;
6512 }
6513
6514 dispatch[SUCCEED][OPTIMIZED] = compare_tails;
6515 dispatch[NOTHING][OPTIMIZED] = compare_tails;
6516 dispatch[TAIL][OPTIMIZED] = compare_tails;
6517 dispatch[WHILEM][OPTIMIZED] = compare_tails;
6518 dispatch[CLOSE][OPTIMIZED] = compare_tails;
6519 dispatch[MINMOD][OPTIMIZED] = compare_tails;
6520 dispatch[OPTIMIZED][OPTIMIZED] = compare_tails;
6521 }
6522