1 /*
2 tre-match-utils.h - TRE matcher helper definitions
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #define str_source ((const tre_str_source*)string)
10
11 #ifdef TRE_WCHAR
12
13 #ifdef TRE_MULTIBYTE
14
15 /* Wide character and multibyte support. */
16
17 #define GET_NEXT_WCHAR() \
18 do { \
19 prev_c = next_c; \
20 if (type == STR_BYTE) \
21 { \
22 pos++; \
23 if (len >= 0 && pos >= len) \
24 next_c = '\0'; \
25 else \
26 next_c = (unsigned char)(*str_byte++); \
27 } \
28 else if (type == STR_WIDE) \
29 { \
30 pos++; \
31 if (len >= 0 && pos >= len) \
32 next_c = L'\0'; \
33 else \
34 next_c = *str_wide++; \
35 } \
36 else if (type == STR_MBS) \
37 { \
38 pos += pos_add_next; \
39 if (str_byte == NULL) \
40 next_c = L'\0'; \
41 else \
42 { \
43 size_t w; \
44 int max; \
45 if (len >= 0) \
46 max = len - pos; \
47 else \
48 max = 32; \
49 if (max <= 0) \
50 { \
51 next_c = L'\0'; \
52 pos_add_next = 1; \
53 } \
54 else \
55 { \
56 w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \
57 if (w == (size_t)-1 || w == (size_t)-2) \
58 return REG_NOMATCH; \
59 if (w == 0 && len >= 0) \
60 { \
61 pos_add_next = 1; \
62 next_c = 0; \
63 str_byte++; \
64 } \
65 else \
66 { \
67 pos_add_next = (unsigned int)w; \
68 str_byte += w; \
69 } \
70 } \
71 } \
72 } \
73 else if (type == STR_USER) \
74 { \
75 pos += pos_add_next; \
76 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
77 str_source->context); \
78 } \
79 } while(/*CONSTCOND*/(void)0,0)
80
81 #else /* !TRE_MULTIBYTE */
82
83 /* Wide character support, no multibyte support. */
84
85 #define GET_NEXT_WCHAR() \
86 do { \
87 prev_c = next_c; \
88 if (type == STR_BYTE) \
89 { \
90 pos++; \
91 if (len >= 0 && pos >= len) \
92 next_c = '\0'; \
93 else \
94 next_c = (unsigned char)(*str_byte++); \
95 } \
96 else if (type == STR_WIDE) \
97 { \
98 pos++; \
99 if (len >= 0 && pos >= len) \
100 next_c = L'\0'; \
101 else \
102 next_c = *str_wide++; \
103 } \
104 else if (type == STR_USER) \
105 { \
106 pos += pos_add_next; \
107 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
108 str_source->context); \
109 } \
110 } while(/*CONSTCOND*/(void)0,0)
111
112 #endif /* !TRE_MULTIBYTE */
113
114 #else /* !TRE_WCHAR */
115
116 /* No wide character or multibyte support. */
117
118 #define GET_NEXT_WCHAR() \
119 do { \
120 prev_c = next_c; \
121 if (type == STR_BYTE) \
122 { \
123 pos++; \
124 if (len >= 0 && pos >= len) \
125 next_c = '\0'; \
126 else \
127 next_c = (unsigned char)(*str_byte++); \
128 } \
129 else if (type == STR_USER) \
130 { \
131 pos += pos_add_next; \
132 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
133 str_source->context); \
134 } \
135 } while(/*CONSTCOND*/(void)0,0)
136
137 #endif /* !TRE_WCHAR */
138
139
140
141 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
142
143 #define CHECK_ASSERTIONS(assertions) \
144 (((assertions & ASSERT_AT_BOL) \
145 && (pos > 0 || reg_notbol) \
146 && (prev_c != L'\n' || !reg_newline)) \
147 || ((assertions & ASSERT_AT_EOL) \
148 && (next_c != L'\0' || reg_noteol) \
149 && (next_c != L'\n' || !reg_newline)) \
150 || ((assertions & ASSERT_AT_BOW) \
151 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
152 || ((assertions & ASSERT_AT_EOW) \
153 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
154 || ((assertions & ASSERT_AT_WB) \
155 && (pos != 0 && next_c != L'\0' \
156 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
157 || ((assertions & ASSERT_AT_WB_NEG) \
158 && (pos == 0 || next_c == L'\0' \
159 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
160
161 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
162 (((trans_i->assertions & ASSERT_CHAR_CLASS) \
163 && !(tnfa->cflags & REG_ICASE) \
164 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
165 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
166 && (tnfa->cflags & REG_ICASE) \
167 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
168 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
169 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
170 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
171 tnfa->cflags & REG_ICASE)))
172
173
174
175
176 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
177 inline static int
tre_tag_order(int num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)178 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
179 int *t1, int *t2)
180 {
181 int i;
182 for (i = 0; i < num_tags; i++)
183 {
184 if (tag_directions[i] == TRE_TAG_MINIMIZE)
185 {
186 if (t1[i] < t2[i])
187 return 1;
188 if (t1[i] > t2[i])
189 return 0;
190 }
191 else
192 {
193 if (t1[i] > t2[i])
194 return 1;
195 if (t1[i] < t2[i])
196 return 0;
197 }
198 }
199 /* assert(0);*/
200 return 0;
201 }
202
203 inline static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)204 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
205 {
206 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
207 while (*classes != (tre_ctype_t)0)
208 if ((!icase && tre_isctype(wc, *classes))
209 || (icase && (tre_isctype(tre_toupper(wc), *classes)
210 || tre_isctype(tre_tolower(wc), *classes))))
211 return 1; /* Match. */
212 else
213 classes++;
214 return 0; /* No match. */
215 }
216