1*95b7b453SJohn Marino /* kwsearch.c - searching subroutines using kwset for grep. 2*95b7b453SJohn Marino Copyright 1992, 1998, 2000, 2007, 2009-2010 Free Software Foundation, Inc. 3*95b7b453SJohn Marino 4*95b7b453SJohn Marino This program is free software; you can redistribute it and/or modify 5*95b7b453SJohn Marino it under the terms of the GNU General Public License as published by 6*95b7b453SJohn Marino the Free Software Foundation; either version 3, or (at your option) 7*95b7b453SJohn Marino any later version. 8*95b7b453SJohn Marino 9*95b7b453SJohn Marino This program is distributed in the hope that it will be useful, 10*95b7b453SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of 11*95b7b453SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*95b7b453SJohn Marino GNU General Public License for more details. 13*95b7b453SJohn Marino 14*95b7b453SJohn Marino You should have received a copy of the GNU General Public License 15*95b7b453SJohn Marino along with this program; if not, write to the Free Software 16*95b7b453SJohn Marino Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17*95b7b453SJohn Marino 02110-1301, USA. */ 18*95b7b453SJohn Marino 19*95b7b453SJohn Marino /* Written August 1992 by Mike Haertel. */ 20*95b7b453SJohn Marino 21*95b7b453SJohn Marino #include <config.h> 22*95b7b453SJohn Marino #include "search.h" 23*95b7b453SJohn Marino 24*95b7b453SJohn Marino /* For -w, we also consider _ to be word constituent. */ 25*95b7b453SJohn Marino #define WCHAR(C) (isalnum (C) || (C) == '_') 26*95b7b453SJohn Marino 27*95b7b453SJohn Marino /* KWset compiled pattern. For Ecompile and Gcompile, we compile 28*95b7b453SJohn Marino a list of strings, at least one of which is known to occur in 29*95b7b453SJohn Marino any string matching the regexp. */ 30*95b7b453SJohn Marino static kwset_t kwset; 31*95b7b453SJohn Marino 32*95b7b453SJohn Marino void 33*95b7b453SJohn Marino Fcompile (char const *pattern, size_t size) 34*95b7b453SJohn Marino { 35*95b7b453SJohn Marino char const *beg, *end, *lim, *err, *pat; 36*95b7b453SJohn Marino size_t psize; 37*95b7b453SJohn Marino 38*95b7b453SJohn Marino kwsinit (&kwset); 39*95b7b453SJohn Marino psize = size; 40*95b7b453SJohn Marino #if MBS_SUPPORT 41*95b7b453SJohn Marino if (match_icase && MB_CUR_MAX > 1) 42*95b7b453SJohn Marino pat = mbtolower (pattern, &psize); 43*95b7b453SJohn Marino else 44*95b7b453SJohn Marino #endif 45*95b7b453SJohn Marino pat = pattern; 46*95b7b453SJohn Marino 47*95b7b453SJohn Marino beg = pat; 48*95b7b453SJohn Marino do 49*95b7b453SJohn Marino { 50*95b7b453SJohn Marino for (lim = beg;; ++lim) 51*95b7b453SJohn Marino { 52*95b7b453SJohn Marino end = lim; 53*95b7b453SJohn Marino if (lim >= pat + psize) 54*95b7b453SJohn Marino break; 55*95b7b453SJohn Marino if (*lim == '\n') 56*95b7b453SJohn Marino { 57*95b7b453SJohn Marino lim++; 58*95b7b453SJohn Marino break; 59*95b7b453SJohn Marino } 60*95b7b453SJohn Marino #if HAVE_DOS_FILE_CONTENTS 61*95b7b453SJohn Marino if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n') 62*95b7b453SJohn Marino { 63*95b7b453SJohn Marino lim += 2; 64*95b7b453SJohn Marino break; 65*95b7b453SJohn Marino } 66*95b7b453SJohn Marino #endif 67*95b7b453SJohn Marino } 68*95b7b453SJohn Marino 69*95b7b453SJohn Marino if ((err = kwsincr (kwset, beg, end - beg)) != NULL) 70*95b7b453SJohn Marino error (EXIT_TROUBLE, 0, "%s", err); 71*95b7b453SJohn Marino beg = lim; 72*95b7b453SJohn Marino } 73*95b7b453SJohn Marino while (beg < pat + psize); 74*95b7b453SJohn Marino 75*95b7b453SJohn Marino if ((err = kwsprep (kwset)) != NULL) 76*95b7b453SJohn Marino error (EXIT_TROUBLE, 0, "%s", err); 77*95b7b453SJohn Marino } 78*95b7b453SJohn Marino 79*95b7b453SJohn Marino size_t 80*95b7b453SJohn Marino Fexecute (char const *buf, size_t size, size_t *match_size, 81*95b7b453SJohn Marino char const *start_ptr) 82*95b7b453SJohn Marino { 83*95b7b453SJohn Marino char const *beg, *try, *end, *mb_start; 84*95b7b453SJohn Marino size_t len; 85*95b7b453SJohn Marino char eol = eolbyte; 86*95b7b453SJohn Marino struct kwsmatch kwsmatch; 87*95b7b453SJohn Marino size_t ret_val; 88*95b7b453SJohn Marino #if MBS_SUPPORT 89*95b7b453SJohn Marino if (MB_CUR_MAX > 1) 90*95b7b453SJohn Marino { 91*95b7b453SJohn Marino if (match_icase) 92*95b7b453SJohn Marino { 93*95b7b453SJohn Marino char *case_buf = mbtolower (buf, &size); 94*95b7b453SJohn Marino if (start_ptr) 95*95b7b453SJohn Marino start_ptr = case_buf + (start_ptr - buf); 96*95b7b453SJohn Marino buf = case_buf; 97*95b7b453SJohn Marino } 98*95b7b453SJohn Marino } 99*95b7b453SJohn Marino #endif /* MBS_SUPPORT */ 100*95b7b453SJohn Marino 101*95b7b453SJohn Marino for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 102*95b7b453SJohn Marino { 103*95b7b453SJohn Marino size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); 104*95b7b453SJohn Marino if (offset == (size_t) -1) 105*95b7b453SJohn Marino goto failure; 106*95b7b453SJohn Marino len = kwsmatch.size[0]; 107*95b7b453SJohn Marino #if MBS_SUPPORT 108*95b7b453SJohn Marino if (MB_CUR_MAX > 1 && is_mb_middle (&mb_start, beg + offset, buf + size, 109*95b7b453SJohn Marino len)) 110*95b7b453SJohn Marino { 111*95b7b453SJohn Marino /* The match was a part of multibyte character, advance at least 112*95b7b453SJohn Marino one byte to ensure no infinite loop happens. */ 113*95b7b453SJohn Marino mbstate_t s; 114*95b7b453SJohn Marino memset (&s, 0, sizeof s); 115*95b7b453SJohn Marino size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s); 116*95b7b453SJohn Marino if (mb_len == (size_t) -2) 117*95b7b453SJohn Marino goto failure; 118*95b7b453SJohn Marino beg = mb_start; 119*95b7b453SJohn Marino if (mb_len != (size_t) -1) 120*95b7b453SJohn Marino beg += mb_len - 1; 121*95b7b453SJohn Marino continue; 122*95b7b453SJohn Marino } 123*95b7b453SJohn Marino #endif /* MBS_SUPPORT */ 124*95b7b453SJohn Marino beg += offset; 125*95b7b453SJohn Marino if (start_ptr && !match_words) 126*95b7b453SJohn Marino goto success_in_beg_and_len; 127*95b7b453SJohn Marino if (match_lines) 128*95b7b453SJohn Marino { 129*95b7b453SJohn Marino if (beg > buf && beg[-1] != eol) 130*95b7b453SJohn Marino continue; 131*95b7b453SJohn Marino if (beg + len < buf + size && beg[len] != eol) 132*95b7b453SJohn Marino continue; 133*95b7b453SJohn Marino goto success; 134*95b7b453SJohn Marino } 135*95b7b453SJohn Marino else if (match_words) 136*95b7b453SJohn Marino for (try = beg; ; ) 137*95b7b453SJohn Marino { 138*95b7b453SJohn Marino if (try > buf && WCHAR((unsigned char) try[-1])) 139*95b7b453SJohn Marino break; 140*95b7b453SJohn Marino if (try + len < buf + size && WCHAR((unsigned char) try[len])) 141*95b7b453SJohn Marino { 142*95b7b453SJohn Marino if (!len) 143*95b7b453SJohn Marino break; 144*95b7b453SJohn Marino offset = kwsexec (kwset, beg, --len, &kwsmatch); 145*95b7b453SJohn Marino if (offset == (size_t) -1) 146*95b7b453SJohn Marino break; 147*95b7b453SJohn Marino try = beg + offset; 148*95b7b453SJohn Marino len = kwsmatch.size[0]; 149*95b7b453SJohn Marino } 150*95b7b453SJohn Marino else if (!start_ptr) 151*95b7b453SJohn Marino goto success; 152*95b7b453SJohn Marino else 153*95b7b453SJohn Marino goto success_in_beg_and_len; 154*95b7b453SJohn Marino } /* for (try) */ 155*95b7b453SJohn Marino else 156*95b7b453SJohn Marino goto success; 157*95b7b453SJohn Marino } /* for (beg in buf) */ 158*95b7b453SJohn Marino 159*95b7b453SJohn Marino failure: 160*95b7b453SJohn Marino ret_val = -1; 161*95b7b453SJohn Marino goto out; 162*95b7b453SJohn Marino 163*95b7b453SJohn Marino success: 164*95b7b453SJohn Marino if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL) 165*95b7b453SJohn Marino end++; 166*95b7b453SJohn Marino else 167*95b7b453SJohn Marino end = buf + size; 168*95b7b453SJohn Marino while (buf < beg && beg[-1] != eol) 169*95b7b453SJohn Marino --beg; 170*95b7b453SJohn Marino len = end - beg; 171*95b7b453SJohn Marino success_in_beg_and_len: 172*95b7b453SJohn Marino *match_size = len; 173*95b7b453SJohn Marino ret_val = beg - buf; 174*95b7b453SJohn Marino out: 175*95b7b453SJohn Marino return ret_val; 176*95b7b453SJohn Marino } 177