1 /*
2   tre-match-utils.h - TRE matcher helper definitions
3 
4   Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
5 
6   This library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10 
11   This library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15 
16   You should have received a copy of the GNU Lesser General Public
17   License along with this library; if not, write to the Free Software
18   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 
22 #define str_source ((tre_str_source*)string)
23 
24 #ifdef TRE_WCHAR
25 
26 #ifdef TRE_MULTIBYTE
27 
28 /* Wide character and multibyte support. */
29 
30 #define GET_NEXT_WCHAR()						      \
31   do {									      \
32     prev_c = next_c;							      \
33     if (type == STR_BYTE)						      \
34       {									      \
35 	pos++;								      \
36 	if (len >= 0 && pos >= len)					      \
37 	  next_c = '\0';						      \
38 	else								      \
39 	  next_c = (unsigned char)(*str_byte++);			      \
40       }									      \
41     else if (type == STR_WIDE)						      \
42       {									      \
43 	pos++;								      \
44 	if (len >= 0 && pos >= len)					      \
45 	  next_c = L'\0';						      \
46 	else								      \
47 	  next_c = *str_wide++;						      \
48       }									      \
49     else if (type == STR_MBS)						      \
50       {									      \
51         pos += pos_add_next;					      	      \
52 	if (str_byte == NULL)						      \
53 	  next_c = L'\0';						      \
54 	else								      \
55 	  {								      \
56 	    size_t w;							      \
57 	    int max;							      \
58 	    if (len >= 0)						      \
59 	      max = len - pos;						      \
60 	    else							      \
61 	      max = 32;							      \
62 	    if (max <= 0)						      \
63 	      {								      \
64 		next_c = L'\0';						      \
65 		pos_add_next = 1;					      \
66 	      }								      \
67 	    else							      \
68 	      {								      \
69 		w = tre_mbrtowc(&next_c, str_byte, max, &mbstate);	      \
70 		if (w == (size_t)-1 || w == (size_t)-2)			      \
71 		  return REG_NOMATCH;					      \
72 		if (w == 0 && len >= 0)					      \
73 		  {							      \
74 		    pos_add_next = 1;					      \
75 		    next_c = 0;						      \
76 		    str_byte++;						      \
77 		  }							      \
78 		else							      \
79 		  {							      \
80 		    pos_add_next = w;					      \
81 		    str_byte += w;					      \
82 		  }							      \
83 	      }								      \
84 	  }								      \
85       }									      \
86     else if (type == STR_USER)						      \
87       {									      \
88         pos += pos_add_next;					      	      \
89 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
90                                                  str_source->context);	      \
91       }									      \
92   } while(0)
93 
94 #else /* !TRE_MULTIBYTE */
95 
96 /* Wide character support, no multibyte support. */
97 
98 #define GET_NEXT_WCHAR()						      \
99   do {									      \
100     prev_c = next_c;							      \
101     if (type == STR_BYTE)						      \
102       {									      \
103 	pos++;								      \
104 	if (len >= 0 && pos >= len)					      \
105 	  next_c = '\0';						      \
106 	else								      \
107 	  next_c = (unsigned char)(*str_byte++);			      \
108       }									      \
109     else if (type == STR_WIDE)						      \
110       {									      \
111 	pos++;								      \
112 	if (len >= 0 && pos >= len)					      \
113 	  next_c = L'\0';						      \
114 	else								      \
115 	  next_c = *str_wide++;						      \
116       }									      \
117     else if (type == STR_USER)						      \
118       {									      \
119         pos += pos_add_next;					      	      \
120 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
121                                                  str_source->context);	      \
122       }									      \
123   } while(0)
124 
125 #endif /* !TRE_MULTIBYTE */
126 
127 #else /* !TRE_WCHAR */
128 
129 /* No wide character or multibyte support. */
130 
131 #define GET_NEXT_WCHAR()						      \
132   do {									      \
133     prev_c = next_c;							      \
134     if (type == STR_BYTE)						      \
135       {									      \
136 	pos++;								      \
137 	if (len >= 0 && pos >= len)					      \
138 	  next_c = '\0';						      \
139 	else								      \
140 	  next_c = (unsigned char)(*str_byte++);			      \
141       }									      \
142     else if (type == STR_USER)						      \
143       {									      \
144 	pos += pos_add_next;						      \
145 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
146 						 str_source->context);	      \
147       }									      \
148   } while(0)
149 
150 #endif /* !TRE_WCHAR */
151 
152 
153 
154 #define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
155 
156 #define CHECK_ASSERTIONS(assertions)					      \
157   (((assertions & ASSERT_AT_BOL)					      \
158     && (pos > 0 || reg_notbol)						      \
159     && (prev_c != L'\n' || !reg_newline))				      \
160    || ((assertions & ASSERT_AT_EOL)					      \
161        && (next_c != L'\0' || reg_noteol)				      \
162        && (next_c != L'\n' || !reg_newline))				      \
163    || ((assertions & ASSERT_AT_BOW)					      \
164        && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))))	      \
165    || ((assertions & ASSERT_AT_EOW)					      \
166        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
167    || ((assertions & ASSERT_AT_WB)					      \
168        && (pos != 0 && next_c != L'\0'					      \
169 	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
170    || ((assertions & ASSERT_AT_WB_NEG)					      \
171        && (pos == 0 || next_c == L'\0'					      \
172 	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
173 
174 
175 
176 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
177 static int
tre_tag_order(int num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)178 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
179 	      int *t1, int *t2)
180 {
181   int i;
182   for (i = 0; i < num_tags; i++)
183     {
184       if (tag_directions[i] == TRE_TAG_MINIMIZE)
185 	{
186 	  if (t1[i] < t2[i])
187 	    return 1;
188 	  if (t1[i] > t2[i])
189 	    return 0;
190 	}
191       else
192 	{
193 	  if (t1[i] > t2[i])
194 	    return 1;
195 	  if (t1[i] < t2[i])
196 	    return 0;
197 	}
198     }
199   /*  assert(0);*/
200   return 0;
201 }
202 
203 static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)204 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
205 {
206   DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
207   while (*classes != (tre_ctype_t)0)
208     if ((!icase && tre_isctype(wc, *classes))
209 	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
210 		      || tre_isctype(tre_tolower(wc), *classes))))
211       return 1; /* Match. */
212     else
213       classes++;
214   return 0; /* No match. */
215 }
216