xref: /freebsd/contrib/less/pattern.c (revision 0e6acb26)
1 /*
2  * Copyright (C) 1984-2015  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 #include "pattern.h"
16 
17 extern int caseless;
18 
19 /*
20  * Compile a search pattern, for future use by match_pattern.
21  */
22 	static int
23 compile_pattern2(char *pattern, int search_type, void **comp_pattern, int show_error)
24 {
25 	if (search_type & SRCH_NO_REGEX)
26 		return (0);
27   {
28 #if HAVE_GNU_REGEX
29 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30 		ecalloc(1, sizeof(struct re_pattern_buffer));
31 	struct re_pattern_buffer **pcomp =
32 		(struct re_pattern_buffer **) comp_pattern;
33 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
34 	if (re_compile_pattern(pattern, strlen(pattern), comp))
35 	{
36 		free(comp);
37 		if (show_error)
38 			error("Invalid pattern", NULL_PARG);
39 		return (-1);
40 	}
41 	if (*pcomp != NULL)
42 		regfree(*pcomp);
43 	*pcomp = comp;
44 #endif
45 #if HAVE_POSIX_REGCOMP
46 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
47 	regex_t **pcomp = (regex_t **) comp_pattern;
48 	if (regcomp(comp, pattern, REGCOMP_FLAG))
49 	{
50 		free(comp);
51 		if (show_error)
52 			error("Invalid pattern", NULL_PARG);
53 		return (-1);
54 	}
55 	if (*pcomp != NULL)
56 		regfree(*pcomp);
57 	*pcomp = comp;
58 #endif
59 #if HAVE_PCRE
60 	pcre *comp;
61 	pcre **pcomp = (pcre **) comp_pattern;
62 	constant char *errstring;
63 	int erroffset;
64 	PARG parg;
65 	comp = pcre_compile(pattern, 0,
66 			&errstring, &erroffset, NULL);
67 	if (comp == NULL)
68 	{
69 		parg.p_string = (char *) errstring;
70 		if (show_error)
71 			error("%s", &parg);
72 		return (-1);
73 	}
74 	*pcomp = comp;
75 #endif
76 #if HAVE_RE_COMP
77 	PARG parg;
78 	int *pcomp = (int *) comp_pattern;
79 	if ((parg.p_string = re_comp(pattern)) != NULL)
80 	{
81 		if (show_error)
82 			error("%s", &parg);
83 		return (-1);
84 	}
85 	*pcomp = 1;
86 #endif
87 #if HAVE_REGCMP
88 	char *comp;
89 	char **pcomp = (char **) comp_pattern;
90 	if ((comp = regcmp(pattern, 0)) == NULL)
91 	{
92 		if (show_error)
93 			error("Invalid pattern", NULL_PARG);
94 		return (-1);
95 	}
96 	if (pcomp != NULL)
97 		free(*pcomp);
98 	*pcomp = comp;
99 #endif
100 #if HAVE_V8_REGCOMP
101 	struct regexp *comp;
102 	struct regexp **pcomp = (struct regexp **) comp_pattern;
103 	reg_show_error = show_error;
104 	comp = regcomp(pattern);
105 	reg_show_error = 1;
106 	if (comp == NULL)
107 	{
108 		/*
109 		 * regcomp has already printed an error message
110 		 * via regerror().
111 		 */
112 		return (-1);
113 	}
114 	if (*pcomp != NULL)
115 		free(*pcomp);
116 	*pcomp = comp;
117 #endif
118   }
119 	return (0);
120 }
121 
122 /*
123  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
124  */
125 	public int
126 compile_pattern(char *pattern, int search_type, void **comp_pattern)
127 {
128 	char *cvt_pattern;
129 	int result;
130 
131 	if (caseless != OPT_ONPLUS)
132 		cvt_pattern = pattern;
133 	else
134 	{
135 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
136 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
137 	}
138 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1);
139 	if (cvt_pattern != pattern)
140 		free(cvt_pattern);
141 	return (result);
142 }
143 
144 /*
145  * Forget that we have a compiled pattern.
146  */
147 	public void
148 uncompile_pattern(void **pattern)
149 {
150 #if HAVE_GNU_REGEX
151 	struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
152 	if (*pcomp != NULL)
153 		regfree(*pcomp);
154 	*pcomp = NULL;
155 #endif
156 #if HAVE_POSIX_REGCOMP
157 	regex_t **pcomp = (regex_t **) pattern;
158 	if (*pcomp != NULL)
159 		regfree(*pcomp);
160 	*pcomp = NULL;
161 #endif
162 #if HAVE_PCRE
163 	pcre **pcomp = (pcre **) pattern;
164 	if (*pcomp != NULL)
165 		pcre_free(*pcomp);
166 	*pcomp = NULL;
167 #endif
168 #if HAVE_RE_COMP
169 	int *pcomp = (int *) pattern;
170 	*pcomp = 0;
171 #endif
172 #if HAVE_REGCMP
173 	char **pcomp = (char **) pattern;
174 	if (*pcomp != NULL)
175 		free(*pcomp);
176 	*pcomp = NULL;
177 #endif
178 #if HAVE_V8_REGCOMP
179 	struct regexp **pcomp = (struct regexp **) pattern;
180 	if (*pcomp != NULL)
181 		free(*pcomp);
182 	*pcomp = NULL;
183 #endif
184 }
185 
186 /*
187  * Can a pattern be successfully compiled?
188  */
189 	public int
190 valid_pattern(char *pattern)
191 {
192 	void *comp_pattern;
193 	int result;
194 
195 	CLEAR_PATTERN(comp_pattern);
196 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
197 	if (result != 0)
198 		return (0);
199 	uncompile_pattern(&comp_pattern);
200 	return (1);
201 }
202 
203 /*
204  * Is a compiled pattern null?
205  */
206 	public int
207 is_null_pattern(void *pattern)
208 {
209 #if HAVE_GNU_REGEX
210 	return (pattern == NULL);
211 #endif
212 #if HAVE_POSIX_REGCOMP
213 	return (pattern == NULL);
214 #endif
215 #if HAVE_PCRE
216 	return (pattern == NULL);
217 #endif
218 #if HAVE_RE_COMP
219 	return (pattern == 0);
220 #endif
221 #if HAVE_REGCMP
222 	return (pattern == NULL);
223 #endif
224 #if HAVE_V8_REGCOMP
225 	return (pattern == NULL);
226 #endif
227 #if NO_REGEX
228 	return (pattern == NULL);
229 #endif
230 }
231 
232 /*
233  * Simple pattern matching function.
234  * It supports no metacharacters like *, etc.
235  */
236 	static int
237 match(char *pattern, int pattern_len, char *buf, int buf_len, char **pfound, char **pend)
238 {
239 	char *pp, *lp;
240 	char *pattern_end = pattern + pattern_len;
241 	char *buf_end = buf + buf_len;
242 
243 	for ( ;  buf < buf_end;  buf++)
244 	{
245 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
246 		{
247 			char cp = *pp;
248 			char cl = *lp;
249 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
250 				cp = ASCII_TO_LOWER(cp);
251 			if (cp != cl)
252 				break;
253 			if (pp == pattern_end || lp == buf_end)
254 				break;
255 		}
256 		if (pp == pattern_end)
257 		{
258 			if (pfound != NULL)
259 				*pfound = buf;
260 			if (pend != NULL)
261 				*pend = lp;
262 			return (1);
263 		}
264 	}
265 	return (0);
266 }
267 
268 /*
269  * Perform a pattern match with the previously compiled pattern.
270  * Set sp and ep to the start and end of the matched string.
271  */
272 	public int
273 match_pattern(void *pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int notbol, int search_type)
274 {
275 	int matched;
276 #if HAVE_GNU_REGEX
277 	struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
278 #endif
279 #if HAVE_POSIX_REGCOMP
280 	regex_t *spattern = (regex_t *) pattern;
281 #endif
282 #if HAVE_PCRE
283 	pcre *spattern = (pcre *) pattern;
284 #endif
285 #if HAVE_RE_COMP
286 	int spattern = (int) pattern;
287 #endif
288 #if HAVE_REGCMP
289 	char *spattern = (char *) pattern;
290 #endif
291 #if HAVE_V8_REGCOMP
292 	struct regexp *spattern = (struct regexp *) pattern;
293 #endif
294 
295 	*sp = *ep = NULL;
296 #if NO_REGEX
297 	search_type |= SRCH_NO_REGEX;
298 #endif
299 	if (search_type & SRCH_NO_REGEX)
300 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
301 	else
302 	{
303 #if HAVE_GNU_REGEX
304 	{
305 		struct re_registers search_regs;
306 		spattern->not_bol = notbol;
307 		spattern->regs_allocated = REGS_UNALLOCATED;
308 		matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
309 		if (matched)
310 		{
311 			*sp = line + search_regs.start[0];
312 			*ep = line + search_regs.end[0];
313 		}
314 	}
315 #endif
316 #if HAVE_POSIX_REGCOMP
317 	{
318 		regmatch_t rm;
319 		int flags = (notbol) ? REG_NOTBOL : 0;
320 #ifdef REG_STARTEND
321 		flags |= REG_STARTEND;
322 		rm.rm_so = 0;
323 		rm.rm_eo = line_len;
324 #endif
325 		matched = !regexec(spattern, line, 1, &rm, flags);
326 		if (matched)
327 		{
328 #ifndef __WATCOMC__
329 			*sp = line + rm.rm_so;
330 			*ep = line + rm.rm_eo;
331 #else
332 			*sp = rm.rm_sp;
333 			*ep = rm.rm_ep;
334 #endif
335 		}
336 	}
337 #endif
338 #if HAVE_PCRE
339 	{
340 		int flags = (notbol) ? PCRE_NOTBOL : 0;
341 		int ovector[3];
342 		matched = pcre_exec(spattern, NULL, line, line_len,
343 			0, flags, ovector, 3) >= 0;
344 		if (matched)
345 		{
346 			*sp = line + ovector[0];
347 			*ep = line + ovector[1];
348 		}
349 	}
350 #endif
351 #if HAVE_RE_COMP
352 	matched = (re_exec(line) == 1);
353 	/*
354 	 * re_exec doesn't seem to provide a way to get the matched string.
355 	 */
356 	*sp = *ep = NULL;
357 #endif
358 #if HAVE_REGCMP
359 	*ep = regex(spattern, line);
360 	matched = (*ep != NULL);
361 	if (matched)
362 		*sp = __loc1;
363 #endif
364 #if HAVE_V8_REGCOMP
365 #if HAVE_REGEXEC2
366 	matched = regexec2(spattern, line, notbol);
367 #else
368 	matched = regexec(spattern, line);
369 #endif
370 	if (matched)
371 	{
372 		*sp = spattern->startp[0];
373 		*ep = spattern->endp[0];
374 	}
375 #endif
376 	}
377 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
378 			((search_type & SRCH_NO_MATCH) && !matched);
379 	return (matched);
380 }
381 
382