1*29ea9d98Stron /* $NetBSD: pattern.c,v 1.3 2013/09/04 19:44:21 tron Exp $ */
2bd4fcc09Stron
3bd4fcc09Stron /*
4*29ea9d98Stron * Copyright (C) 1984-2012 Mark Nudelman
5bd4fcc09Stron *
6bd4fcc09Stron * You may distribute under the terms of either the GNU General Public
7bd4fcc09Stron * License or the Less License, as specified in the README file.
8bd4fcc09Stron *
9*29ea9d98Stron * For more information, see the README file.
10bd4fcc09Stron */
11bd4fcc09Stron
12bd4fcc09Stron /*
13bd4fcc09Stron * Routines to do pattern matching.
14bd4fcc09Stron */
15bd4fcc09Stron
16bd4fcc09Stron #include "less.h"
17bd4fcc09Stron #include "pattern.h"
18bd4fcc09Stron
19bd4fcc09Stron extern int caseless;
20bd4fcc09Stron
21bd4fcc09Stron /*
22bd4fcc09Stron * Compile a search pattern, for future use by match_pattern.
23bd4fcc09Stron */
24bd4fcc09Stron static int
compile_pattern2(pattern,search_type,comp_pattern)25bd4fcc09Stron compile_pattern2(pattern, search_type, comp_pattern)
26bd4fcc09Stron char *pattern;
27bd4fcc09Stron int search_type;
28bd4fcc09Stron void **comp_pattern;
29bd4fcc09Stron {
30*29ea9d98Stron if (search_type & SRCH_NO_REGEX)
31*29ea9d98Stron return (0);
32bd4fcc09Stron {
33*29ea9d98Stron #if HAVE_GNU_REGEX
34*29ea9d98Stron struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
35*29ea9d98Stron ecalloc(1, sizeof(struct re_pattern_buffer));
36*29ea9d98Stron struct re_pattern_buffer **pcomp =
37*29ea9d98Stron (struct re_pattern_buffer **) comp_pattern;
38*29ea9d98Stron re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
39*29ea9d98Stron if (re_compile_pattern(pattern, strlen(pattern), comp))
40*29ea9d98Stron {
41*29ea9d98Stron free(comp);
42*29ea9d98Stron error("Invalid pattern", NULL_PARG);
43*29ea9d98Stron return (-1);
44*29ea9d98Stron }
45*29ea9d98Stron if (*pcomp != NULL)
46*29ea9d98Stron regfree(*pcomp);
47*29ea9d98Stron *pcomp = comp;
48*29ea9d98Stron #endif
49bd4fcc09Stron #if HAVE_POSIX_REGCOMP
50bd4fcc09Stron regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
51bd4fcc09Stron regex_t **pcomp = (regex_t **) comp_pattern;
52bd4fcc09Stron if (regcomp(comp, pattern, REGCOMP_FLAG))
53bd4fcc09Stron {
54bd4fcc09Stron free(comp);
55bd4fcc09Stron error("Invalid pattern", NULL_PARG);
56bd4fcc09Stron return (-1);
57bd4fcc09Stron }
58bd4fcc09Stron if (*pcomp != NULL)
59bd4fcc09Stron regfree(*pcomp);
60bd4fcc09Stron *pcomp = comp;
61bd4fcc09Stron #endif
62bd4fcc09Stron #if HAVE_PCRE
63bd4fcc09Stron pcre *comp;
64bd4fcc09Stron pcre **pcomp = (pcre **) comp_pattern;
65*29ea9d98Stron constant char *errstring;
66bd4fcc09Stron int erroffset;
67bd4fcc09Stron PARG parg;
68bd4fcc09Stron comp = pcre_compile(pattern, 0,
69bd4fcc09Stron &errstring, &erroffset, NULL);
70bd4fcc09Stron if (comp == NULL)
71bd4fcc09Stron {
72bd4fcc09Stron parg.p_string = (char *) errstring;
73bd4fcc09Stron error("%s", &parg);
74bd4fcc09Stron return (-1);
75bd4fcc09Stron }
76bd4fcc09Stron *pcomp = comp;
77bd4fcc09Stron #endif
78bd4fcc09Stron #if HAVE_RE_COMP
79bd4fcc09Stron PARG parg;
80bd4fcc09Stron int *pcomp = (int *) comp_pattern;
81bd4fcc09Stron if ((parg.p_string = re_comp(pattern)) != NULL)
82bd4fcc09Stron {
83bd4fcc09Stron error("%s", &parg);
84bd4fcc09Stron return (-1);
85bd4fcc09Stron }
86bd4fcc09Stron *pcomp = 1;
87bd4fcc09Stron #endif
88bd4fcc09Stron #if HAVE_REGCMP
89bd4fcc09Stron char *comp;
90bd4fcc09Stron char **pcomp = (char **) comp_pattern;
91bd4fcc09Stron if ((comp = regcmp(pattern, 0)) == NULL)
92bd4fcc09Stron {
93bd4fcc09Stron error("Invalid pattern", NULL_PARG);
94bd4fcc09Stron return (-1);
95bd4fcc09Stron }
96bd4fcc09Stron if (pcomp != NULL)
97bd4fcc09Stron free(*pcomp);
98bd4fcc09Stron *pcomp = comp;
99bd4fcc09Stron #endif
100bd4fcc09Stron #if HAVE_V8_REGCOMP
101bd4fcc09Stron struct regexp *comp;
102bd4fcc09Stron struct regexp **pcomp = (struct regexp **) comp_pattern;
103bd4fcc09Stron if ((comp = regcomp(pattern)) == NULL)
104bd4fcc09Stron {
105bd4fcc09Stron /*
106bd4fcc09Stron * regcomp has already printed an error message
107bd4fcc09Stron * via regerror().
108bd4fcc09Stron */
109bd4fcc09Stron return (-1);
110bd4fcc09Stron }
111bd4fcc09Stron if (*pcomp != NULL)
112bd4fcc09Stron free(*pcomp);
113bd4fcc09Stron *pcomp = comp;
114bd4fcc09Stron #endif
115bd4fcc09Stron }
116bd4fcc09Stron return (0);
117bd4fcc09Stron }
118bd4fcc09Stron
119bd4fcc09Stron /*
120bd4fcc09Stron * Like compile_pattern2, but convert the pattern to lowercase if necessary.
121bd4fcc09Stron */
122bd4fcc09Stron public int
compile_pattern(pattern,search_type,comp_pattern)123bd4fcc09Stron compile_pattern(pattern, search_type, comp_pattern)
124bd4fcc09Stron char *pattern;
125bd4fcc09Stron int search_type;
126bd4fcc09Stron void **comp_pattern;
127bd4fcc09Stron {
128bd4fcc09Stron char *cvt_pattern;
129bd4fcc09Stron int result;
130bd4fcc09Stron
131bd4fcc09Stron if (caseless != OPT_ONPLUS)
132bd4fcc09Stron cvt_pattern = pattern;
133bd4fcc09Stron else
134bd4fcc09Stron {
135bd4fcc09Stron cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
136bd4fcc09Stron cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
137bd4fcc09Stron }
138bd4fcc09Stron result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
139bd4fcc09Stron if (cvt_pattern != pattern)
140bd4fcc09Stron free(cvt_pattern);
141bd4fcc09Stron return (result);
142bd4fcc09Stron }
143bd4fcc09Stron
144bd4fcc09Stron /*
145bd4fcc09Stron * Forget that we have a compiled pattern.
146bd4fcc09Stron */
147bd4fcc09Stron public void
uncompile_pattern(pattern)148bd4fcc09Stron uncompile_pattern(pattern)
149bd4fcc09Stron void **pattern;
150bd4fcc09Stron {
151*29ea9d98Stron #if HAVE_GNU_REGEX
152*29ea9d98Stron struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
153*29ea9d98Stron if (*pcomp != NULL)
154*29ea9d98Stron regfree(*pcomp);
155*29ea9d98Stron *pcomp = NULL;
156*29ea9d98Stron #endif
157bd4fcc09Stron #if HAVE_POSIX_REGCOMP
158bd4fcc09Stron regex_t **pcomp = (regex_t **) pattern;
159bd4fcc09Stron if (*pcomp != NULL)
160bd4fcc09Stron regfree(*pcomp);
161bd4fcc09Stron *pcomp = NULL;
162bd4fcc09Stron #endif
163bd4fcc09Stron #if HAVE_PCRE
164bd4fcc09Stron pcre **pcomp = (pcre **) pattern;
165bd4fcc09Stron if (*pcomp != NULL)
166bd4fcc09Stron pcre_free(*pcomp);
167bd4fcc09Stron *pcomp = NULL;
168bd4fcc09Stron #endif
169bd4fcc09Stron #if HAVE_RE_COMP
170bd4fcc09Stron int *pcomp = (int *) pattern;
171bd4fcc09Stron *pcomp = 0;
172bd4fcc09Stron #endif
173bd4fcc09Stron #if HAVE_REGCMP
174bd4fcc09Stron char **pcomp = (char **) pattern;
175bd4fcc09Stron if (*pcomp != NULL)
176bd4fcc09Stron free(*pcomp);
177bd4fcc09Stron *pcomp = NULL;
178bd4fcc09Stron #endif
179bd4fcc09Stron #if HAVE_V8_REGCOMP
180bd4fcc09Stron struct regexp **pcomp = (struct regexp **) pattern;
181bd4fcc09Stron if (*pcomp != NULL)
182bd4fcc09Stron free(*pcomp);
183bd4fcc09Stron *pcomp = NULL;
184bd4fcc09Stron #endif
185bd4fcc09Stron }
186bd4fcc09Stron
187bd4fcc09Stron /*
188bd4fcc09Stron * Is a compiled pattern null?
189bd4fcc09Stron */
190bd4fcc09Stron public int
is_null_pattern(pattern)191bd4fcc09Stron is_null_pattern(pattern)
192bd4fcc09Stron void *pattern;
193bd4fcc09Stron {
194*29ea9d98Stron #if HAVE_GNU_REGEX
195*29ea9d98Stron return (pattern == NULL);
196*29ea9d98Stron #endif
197bd4fcc09Stron #if HAVE_POSIX_REGCOMP
198bd4fcc09Stron return (pattern == NULL);
199bd4fcc09Stron #endif
200bd4fcc09Stron #if HAVE_PCRE
201bd4fcc09Stron return (pattern == NULL);
202bd4fcc09Stron #endif
203bd4fcc09Stron #if HAVE_RE_COMP
204bd4fcc09Stron return (pattern == 0);
205bd4fcc09Stron #endif
206bd4fcc09Stron #if HAVE_REGCMP
207bd4fcc09Stron return (pattern == NULL);
208bd4fcc09Stron #endif
209bd4fcc09Stron #if HAVE_V8_REGCOMP
210bd4fcc09Stron return (pattern == NULL);
211bd4fcc09Stron #endif
212bd4fcc09Stron }
213bd4fcc09Stron
214bd4fcc09Stron /*
215bd4fcc09Stron * Simple pattern matching function.
216bd4fcc09Stron * It supports no metacharacters like *, etc.
217bd4fcc09Stron */
218bd4fcc09Stron static int
match(pattern,pattern_len,buf,buf_len,pfound,pend)219bd4fcc09Stron match(pattern, pattern_len, buf, buf_len, pfound, pend)
220bd4fcc09Stron char *pattern;
221bd4fcc09Stron int pattern_len;
222bd4fcc09Stron char *buf;
223bd4fcc09Stron int buf_len;
224bd4fcc09Stron char **pfound, **pend;
225bd4fcc09Stron {
226bd4fcc09Stron register char *pp, *lp;
227bd4fcc09Stron register char *pattern_end = pattern + pattern_len;
228bd4fcc09Stron register char *buf_end = buf + buf_len;
229bd4fcc09Stron
230bd4fcc09Stron for ( ; buf < buf_end; buf++)
231bd4fcc09Stron {
232bd4fcc09Stron for (pp = pattern, lp = buf; *pp == *lp; pp++, lp++)
233bd4fcc09Stron if (pp == pattern_end || lp == buf_end)
234bd4fcc09Stron break;
235bd4fcc09Stron if (pp == pattern_end)
236bd4fcc09Stron {
237bd4fcc09Stron if (pfound != NULL)
238bd4fcc09Stron *pfound = buf;
239bd4fcc09Stron if (pend != NULL)
240bd4fcc09Stron *pend = lp;
241bd4fcc09Stron return (1);
242bd4fcc09Stron }
243bd4fcc09Stron }
244bd4fcc09Stron return (0);
245bd4fcc09Stron }
246bd4fcc09Stron
247bd4fcc09Stron /*
248bd4fcc09Stron * Perform a pattern match with the previously compiled pattern.
249bd4fcc09Stron * Set sp and ep to the start and end of the matched string.
250bd4fcc09Stron */
251bd4fcc09Stron public int
match_pattern(pattern,tpattern,line,line_len,sp,ep,notbol,search_type)252bd4fcc09Stron match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
253bd4fcc09Stron void *pattern;
254bd4fcc09Stron char *tpattern;
255bd4fcc09Stron char *line;
256bd4fcc09Stron int line_len;
257bd4fcc09Stron char **sp;
258bd4fcc09Stron char **ep;
259bd4fcc09Stron int notbol;
260bd4fcc09Stron int search_type;
261bd4fcc09Stron {
262bd4fcc09Stron int matched;
263*29ea9d98Stron #if HAVE_GNU_REGEX
264*29ea9d98Stron struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
265*29ea9d98Stron #endif
266bd4fcc09Stron #if HAVE_POSIX_REGCOMP
267bd4fcc09Stron regex_t *spattern = (regex_t *) pattern;
268bd4fcc09Stron #endif
269bd4fcc09Stron #if HAVE_PCRE
270bd4fcc09Stron pcre *spattern = (pcre *) pattern;
271bd4fcc09Stron #endif
272bd4fcc09Stron #if HAVE_RE_COMP
273bd4fcc09Stron int spattern = (int) pattern;
274bd4fcc09Stron #endif
275bd4fcc09Stron #if HAVE_REGCMP
276bd4fcc09Stron char *spattern = (char *) pattern;
277bd4fcc09Stron #endif
278bd4fcc09Stron #if HAVE_V8_REGCOMP
279bd4fcc09Stron struct regexp *spattern = (struct regexp *) pattern;
280bd4fcc09Stron #endif
281bd4fcc09Stron
282*29ea9d98Stron #if NO_REGEX
283*29ea9d98Stron search_type |= SRCH_NO_REGEX;
284*29ea9d98Stron #endif
285bd4fcc09Stron if (search_type & SRCH_NO_REGEX)
286bd4fcc09Stron matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
287bd4fcc09Stron else
288bd4fcc09Stron {
289*29ea9d98Stron #if HAVE_GNU_REGEX
290*29ea9d98Stron {
291*29ea9d98Stron struct re_registers search_regs;
292*29ea9d98Stron regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t));
293*29ea9d98Stron regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t));
294*29ea9d98Stron spattern->not_bol = notbol;
295*29ea9d98Stron re_set_registers(spattern, &search_regs, 1, starts, ends);
296*29ea9d98Stron matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
297*29ea9d98Stron if (matched)
298*29ea9d98Stron {
299*29ea9d98Stron *sp = line + search_regs.start[0];
300*29ea9d98Stron *ep = line + search_regs.end[0];
301*29ea9d98Stron }
302*29ea9d98Stron free(starts);
303*29ea9d98Stron free(ends);
304*29ea9d98Stron }
305*29ea9d98Stron #endif
306bd4fcc09Stron #if HAVE_POSIX_REGCOMP
307bd4fcc09Stron {
308bd4fcc09Stron regmatch_t rm;
309bd4fcc09Stron int flags = (notbol) ? REG_NOTBOL : 0;
310bd4fcc09Stron matched = !regexec(spattern, line, 1, &rm, flags);
311bd4fcc09Stron if (matched)
312bd4fcc09Stron {
313bd4fcc09Stron #ifndef __WATCOMC__
314bd4fcc09Stron *sp = line + rm.rm_so;
315bd4fcc09Stron *ep = line + rm.rm_eo;
316bd4fcc09Stron #else
317bd4fcc09Stron *sp = rm.rm_sp;
318bd4fcc09Stron *ep = rm.rm_ep;
319bd4fcc09Stron #endif
320bd4fcc09Stron }
321bd4fcc09Stron }
322bd4fcc09Stron #endif
323bd4fcc09Stron #if HAVE_PCRE
324bd4fcc09Stron {
325bd4fcc09Stron int flags = (notbol) ? PCRE_NOTBOL : 0;
326bd4fcc09Stron int ovector[3];
327bd4fcc09Stron matched = pcre_exec(spattern, NULL, line, line_len,
328bd4fcc09Stron 0, flags, ovector, 3) >= 0;
329bd4fcc09Stron if (matched)
330bd4fcc09Stron {
331bd4fcc09Stron *sp = line + ovector[0];
332bd4fcc09Stron *ep = line + ovector[1];
333bd4fcc09Stron }
334bd4fcc09Stron }
335bd4fcc09Stron #endif
336bd4fcc09Stron #if HAVE_RE_COMP
337bd4fcc09Stron matched = (re_exec(line) == 1);
338bd4fcc09Stron /*
339bd4fcc09Stron * re_exec doesn't seem to provide a way to get the matched string.
340bd4fcc09Stron */
341bd4fcc09Stron *sp = *ep = NULL;
342bd4fcc09Stron #endif
343bd4fcc09Stron #if HAVE_REGCMP
344bd4fcc09Stron *ep = regex(spattern, line);
345bd4fcc09Stron matched = (*ep != NULL);
346bd4fcc09Stron if (matched)
347bd4fcc09Stron *sp = __loc1;
348bd4fcc09Stron #endif
349bd4fcc09Stron #if HAVE_V8_REGCOMP
350bd4fcc09Stron #if HAVE_REGEXEC2
351bd4fcc09Stron matched = regexec2(spattern, line, notbol);
352bd4fcc09Stron #else
353bd4fcc09Stron matched = regexec(spattern, line);
354bd4fcc09Stron #endif
355bd4fcc09Stron if (matched)
356bd4fcc09Stron {
357bd4fcc09Stron *sp = spattern->startp[0];
358bd4fcc09Stron *ep = spattern->endp[0];
359bd4fcc09Stron }
360bd4fcc09Stron #endif
361bd4fcc09Stron }
362bd4fcc09Stron matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
363bd4fcc09Stron ((search_type & SRCH_NO_MATCH) && !matched);
364bd4fcc09Stron return (matched);
365bd4fcc09Stron }
366bd4fcc09Stron
367