xref: /dragonfly/contrib/grep/src/kwsearch.c (revision 95b7b453)
1*95b7b453SJohn Marino /* kwsearch.c - searching subroutines using kwset for grep.
2*95b7b453SJohn Marino    Copyright 1992, 1998, 2000, 2007, 2009-2010 Free Software Foundation, Inc.
3*95b7b453SJohn Marino 
4*95b7b453SJohn Marino    This program is free software; you can redistribute it and/or modify
5*95b7b453SJohn Marino    it under the terms of the GNU General Public License as published by
6*95b7b453SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
7*95b7b453SJohn Marino    any later version.
8*95b7b453SJohn Marino 
9*95b7b453SJohn Marino    This program is distributed in the hope that it will be useful,
10*95b7b453SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
11*95b7b453SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*95b7b453SJohn Marino    GNU General Public License for more details.
13*95b7b453SJohn Marino 
14*95b7b453SJohn Marino    You should have received a copy of the GNU General Public License
15*95b7b453SJohn Marino    along with this program; if not, write to the Free Software
16*95b7b453SJohn Marino    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17*95b7b453SJohn Marino    02110-1301, USA.  */
18*95b7b453SJohn Marino 
19*95b7b453SJohn Marino /* Written August 1992 by Mike Haertel. */
20*95b7b453SJohn Marino 
21*95b7b453SJohn Marino #include <config.h>
22*95b7b453SJohn Marino #include "search.h"
23*95b7b453SJohn Marino 
24*95b7b453SJohn Marino /* For -w, we also consider _ to be word constituent.  */
25*95b7b453SJohn Marino #define WCHAR(C) (isalnum (C) || (C) == '_')
26*95b7b453SJohn Marino 
27*95b7b453SJohn Marino /* KWset compiled pattern.  For Ecompile and Gcompile, we compile
28*95b7b453SJohn Marino    a list of strings, at least one of which is known to occur in
29*95b7b453SJohn Marino    any string matching the regexp. */
30*95b7b453SJohn Marino static kwset_t kwset;
31*95b7b453SJohn Marino 
32*95b7b453SJohn Marino void
33*95b7b453SJohn Marino Fcompile (char const *pattern, size_t size)
34*95b7b453SJohn Marino {
35*95b7b453SJohn Marino   char const *beg, *end, *lim, *err, *pat;
36*95b7b453SJohn Marino   size_t psize;
37*95b7b453SJohn Marino 
38*95b7b453SJohn Marino   kwsinit (&kwset);
39*95b7b453SJohn Marino   psize = size;
40*95b7b453SJohn Marino #if MBS_SUPPORT
41*95b7b453SJohn Marino   if (match_icase && MB_CUR_MAX > 1)
42*95b7b453SJohn Marino     pat = mbtolower (pattern, &psize);
43*95b7b453SJohn Marino   else
44*95b7b453SJohn Marino #endif
45*95b7b453SJohn Marino     pat = pattern;
46*95b7b453SJohn Marino 
47*95b7b453SJohn Marino   beg = pat;
48*95b7b453SJohn Marino   do
49*95b7b453SJohn Marino     {
50*95b7b453SJohn Marino       for (lim = beg;; ++lim)
51*95b7b453SJohn Marino         {
52*95b7b453SJohn Marino           end = lim;
53*95b7b453SJohn Marino           if (lim >= pat + psize)
54*95b7b453SJohn Marino             break;
55*95b7b453SJohn Marino          if (*lim == '\n')
56*95b7b453SJohn Marino            {
57*95b7b453SJohn Marino              lim++;
58*95b7b453SJohn Marino              break;
59*95b7b453SJohn Marino            }
60*95b7b453SJohn Marino #if HAVE_DOS_FILE_CONTENTS
61*95b7b453SJohn Marino          if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n')
62*95b7b453SJohn Marino            {
63*95b7b453SJohn Marino              lim += 2;
64*95b7b453SJohn Marino              break;
65*95b7b453SJohn Marino            }
66*95b7b453SJohn Marino #endif
67*95b7b453SJohn Marino         }
68*95b7b453SJohn Marino 
69*95b7b453SJohn Marino       if ((err = kwsincr (kwset, beg, end - beg)) != NULL)
70*95b7b453SJohn Marino         error (EXIT_TROUBLE, 0, "%s", err);
71*95b7b453SJohn Marino       beg = lim;
72*95b7b453SJohn Marino     }
73*95b7b453SJohn Marino   while (beg < pat + psize);
74*95b7b453SJohn Marino 
75*95b7b453SJohn Marino   if ((err = kwsprep (kwset)) != NULL)
76*95b7b453SJohn Marino     error (EXIT_TROUBLE, 0, "%s", err);
77*95b7b453SJohn Marino }
78*95b7b453SJohn Marino 
79*95b7b453SJohn Marino size_t
80*95b7b453SJohn Marino Fexecute (char const *buf, size_t size, size_t *match_size,
81*95b7b453SJohn Marino           char const *start_ptr)
82*95b7b453SJohn Marino {
83*95b7b453SJohn Marino   char const *beg, *try, *end, *mb_start;
84*95b7b453SJohn Marino   size_t len;
85*95b7b453SJohn Marino   char eol = eolbyte;
86*95b7b453SJohn Marino   struct kwsmatch kwsmatch;
87*95b7b453SJohn Marino   size_t ret_val;
88*95b7b453SJohn Marino #if MBS_SUPPORT
89*95b7b453SJohn Marino   if (MB_CUR_MAX > 1)
90*95b7b453SJohn Marino     {
91*95b7b453SJohn Marino       if (match_icase)
92*95b7b453SJohn Marino         {
93*95b7b453SJohn Marino           char *case_buf = mbtolower (buf, &size);
94*95b7b453SJohn Marino           if (start_ptr)
95*95b7b453SJohn Marino             start_ptr = case_buf + (start_ptr - buf);
96*95b7b453SJohn Marino           buf = case_buf;
97*95b7b453SJohn Marino         }
98*95b7b453SJohn Marino     }
99*95b7b453SJohn Marino #endif /* MBS_SUPPORT */
100*95b7b453SJohn Marino 
101*95b7b453SJohn Marino   for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
102*95b7b453SJohn Marino     {
103*95b7b453SJohn Marino       size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
104*95b7b453SJohn Marino       if (offset == (size_t) -1)
105*95b7b453SJohn Marino         goto failure;
106*95b7b453SJohn Marino       len = kwsmatch.size[0];
107*95b7b453SJohn Marino #if MBS_SUPPORT
108*95b7b453SJohn Marino       if (MB_CUR_MAX > 1 && is_mb_middle (&mb_start, beg + offset, buf + size,
109*95b7b453SJohn Marino                                           len))
110*95b7b453SJohn Marino         {
111*95b7b453SJohn Marino           /* The match was a part of multibyte character, advance at least
112*95b7b453SJohn Marino              one byte to ensure no infinite loop happens.  */
113*95b7b453SJohn Marino           mbstate_t s;
114*95b7b453SJohn Marino           memset (&s, 0, sizeof s);
115*95b7b453SJohn Marino           size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s);
116*95b7b453SJohn Marino           if (mb_len == (size_t) -2)
117*95b7b453SJohn Marino             goto failure;
118*95b7b453SJohn Marino           beg = mb_start;
119*95b7b453SJohn Marino           if (mb_len != (size_t) -1)
120*95b7b453SJohn Marino             beg += mb_len - 1;
121*95b7b453SJohn Marino           continue;
122*95b7b453SJohn Marino         }
123*95b7b453SJohn Marino #endif /* MBS_SUPPORT */
124*95b7b453SJohn Marino       beg += offset;
125*95b7b453SJohn Marino       if (start_ptr && !match_words)
126*95b7b453SJohn Marino         goto success_in_beg_and_len;
127*95b7b453SJohn Marino       if (match_lines)
128*95b7b453SJohn Marino         {
129*95b7b453SJohn Marino           if (beg > buf && beg[-1] != eol)
130*95b7b453SJohn Marino             continue;
131*95b7b453SJohn Marino           if (beg + len < buf + size && beg[len] != eol)
132*95b7b453SJohn Marino             continue;
133*95b7b453SJohn Marino           goto success;
134*95b7b453SJohn Marino         }
135*95b7b453SJohn Marino       else if (match_words)
136*95b7b453SJohn Marino         for (try = beg; ; )
137*95b7b453SJohn Marino           {
138*95b7b453SJohn Marino             if (try > buf && WCHAR((unsigned char) try[-1]))
139*95b7b453SJohn Marino               break;
140*95b7b453SJohn Marino             if (try + len < buf + size && WCHAR((unsigned char) try[len]))
141*95b7b453SJohn Marino               {
142*95b7b453SJohn Marino                 if (!len)
143*95b7b453SJohn Marino                   break;
144*95b7b453SJohn Marino                 offset = kwsexec (kwset, beg, --len, &kwsmatch);
145*95b7b453SJohn Marino                 if (offset == (size_t) -1)
146*95b7b453SJohn Marino                   break;
147*95b7b453SJohn Marino                 try = beg + offset;
148*95b7b453SJohn Marino                 len = kwsmatch.size[0];
149*95b7b453SJohn Marino               }
150*95b7b453SJohn Marino             else if (!start_ptr)
151*95b7b453SJohn Marino               goto success;
152*95b7b453SJohn Marino             else
153*95b7b453SJohn Marino               goto success_in_beg_and_len;
154*95b7b453SJohn Marino           } /* for (try) */
155*95b7b453SJohn Marino       else
156*95b7b453SJohn Marino         goto success;
157*95b7b453SJohn Marino     } /* for (beg in buf) */
158*95b7b453SJohn Marino 
159*95b7b453SJohn Marino  failure:
160*95b7b453SJohn Marino   ret_val = -1;
161*95b7b453SJohn Marino   goto out;
162*95b7b453SJohn Marino 
163*95b7b453SJohn Marino  success:
164*95b7b453SJohn Marino   if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL)
165*95b7b453SJohn Marino     end++;
166*95b7b453SJohn Marino   else
167*95b7b453SJohn Marino     end = buf + size;
168*95b7b453SJohn Marino   while (buf < beg && beg[-1] != eol)
169*95b7b453SJohn Marino     --beg;
170*95b7b453SJohn Marino   len = end - beg;
171*95b7b453SJohn Marino  success_in_beg_and_len:
172*95b7b453SJohn Marino   *match_size = len;
173*95b7b453SJohn Marino   ret_val = beg - buf;
174*95b7b453SJohn Marino  out:
175*95b7b453SJohn Marino   return ret_val;
176*95b7b453SJohn Marino }
177