1 /*
2 tre-match-utils.h - TRE matcher helper definitions
3
4 Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21
22 #define str_source ((tre_str_source*)string)
23
24 #ifdef TRE_WCHAR
25
26 #ifdef TRE_MULTIBYTE
27
28 /* Wide character and multibyte support. */
29
30 #define GET_NEXT_WCHAR() \
31 do { \
32 prev_c = next_c; \
33 if (type == STR_BYTE) \
34 { \
35 pos++; \
36 if (len >= 0 && pos >= len) \
37 next_c = '\0'; \
38 else \
39 next_c = (unsigned char)(*str_byte++); \
40 } \
41 else if (type == STR_WIDE) \
42 { \
43 pos++; \
44 if (len >= 0 && pos >= len) \
45 next_c = L'\0'; \
46 else \
47 next_c = *str_wide++; \
48 } \
49 else if (type == STR_MBS) \
50 { \
51 pos += pos_add_next; \
52 if (str_byte == NULL) \
53 next_c = L'\0'; \
54 else \
55 { \
56 size_t w; \
57 int max; \
58 if (len >= 0) \
59 max = len - pos; \
60 else \
61 max = 32; \
62 if (max <= 0) \
63 { \
64 next_c = L'\0'; \
65 pos_add_next = 1; \
66 } \
67 else \
68 { \
69 w = tre_mbrtowc(&next_c, str_byte, max, &mbstate); \
70 if (w == (size_t)-1 || w == (size_t)-2) \
71 return REG_NOMATCH; \
72 if (w == 0 && len >= 0) \
73 { \
74 pos_add_next = 1; \
75 next_c = 0; \
76 str_byte++; \
77 } \
78 else \
79 { \
80 pos_add_next = w; \
81 str_byte += w; \
82 } \
83 } \
84 } \
85 } \
86 else if (type == STR_USER) \
87 { \
88 pos += pos_add_next; \
89 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
90 str_source->context); \
91 } \
92 } while(0)
93
94 #else /* !TRE_MULTIBYTE */
95
96 /* Wide character support, no multibyte support. */
97
98 #define GET_NEXT_WCHAR() \
99 do { \
100 prev_c = next_c; \
101 if (type == STR_BYTE) \
102 { \
103 pos++; \
104 if (len >= 0 && pos >= len) \
105 next_c = '\0'; \
106 else \
107 next_c = (unsigned char)(*str_byte++); \
108 } \
109 else if (type == STR_WIDE) \
110 { \
111 pos++; \
112 if (len >= 0 && pos >= len) \
113 next_c = L'\0'; \
114 else \
115 next_c = *str_wide++; \
116 } \
117 else if (type == STR_USER) \
118 { \
119 pos += pos_add_next; \
120 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
121 str_source->context); \
122 } \
123 } while(0)
124
125 #endif /* !TRE_MULTIBYTE */
126
127 #else /* !TRE_WCHAR */
128
129 /* No wide character or multibyte support. */
130
131 #define GET_NEXT_WCHAR() \
132 do { \
133 prev_c = next_c; \
134 if (type == STR_BYTE) \
135 { \
136 pos++; \
137 if (len >= 0 && pos >= len) \
138 next_c = '\0'; \
139 else \
140 next_c = (unsigned char)(*str_byte++); \
141 } \
142 else if (type == STR_USER) \
143 { \
144 pos += pos_add_next; \
145 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
146 str_source->context); \
147 } \
148 } while(0)
149
150 #endif /* !TRE_WCHAR */
151
152
153
154 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
155
156 #define CHECK_ASSERTIONS(assertions) \
157 (((assertions & ASSERT_AT_BOL) \
158 && (pos > 0 || reg_notbol) \
159 && (prev_c != L'\n' || !reg_newline)) \
160 || ((assertions & ASSERT_AT_EOL) \
161 && (next_c != L'\0' || reg_noteol) \
162 && (next_c != L'\n' || !reg_newline)) \
163 || ((assertions & ASSERT_AT_BOW) \
164 && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))) \
165 || ((assertions & ASSERT_AT_EOW) \
166 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
167 || ((assertions & ASSERT_AT_WB) \
168 && (pos != 0 && next_c != L'\0' \
169 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
170 || ((assertions & ASSERT_AT_WB_NEG) \
171 && (pos == 0 || next_c == L'\0' \
172 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
173
174
175
176 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
177 static int
tre_tag_order(int num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)178 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
179 int *t1, int *t2)
180 {
181 int i;
182 for (i = 0; i < num_tags; i++)
183 {
184 if (tag_directions[i] == TRE_TAG_MINIMIZE)
185 {
186 if (t1[i] < t2[i])
187 return 1;
188 if (t1[i] > t2[i])
189 return 0;
190 }
191 else
192 {
193 if (t1[i] > t2[i])
194 return 1;
195 if (t1[i] < t2[i])
196 return 0;
197 }
198 }
199 /* assert(0);*/
200 return 0;
201 }
202
203 static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)204 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
205 {
206 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
207 while (*classes != (tre_ctype_t)0)
208 if ((!icase && tre_isctype(wc, *classes))
209 || (icase && (tre_isctype(tre_toupper(wc), *classes)
210 || tre_isctype(tre_tolower(wc), *classes))))
211 return 1; /* Match. */
212 else
213 classes++;
214 return 0; /* No match. */
215 }
216