1 /* kwsearch.c - searching subroutines using kwset for grep. 2 Copyright 1992, 1998, 2000, 2007, 2009-2011 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written August 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include "search.h" 23 24 /* For -w, we also consider _ to be word constituent. */ 25 #define WCHAR(C) (isalnum (C) || (C) == '_') 26 27 /* KWset compiled pattern. For Ecompile and Gcompile, we compile 28 a list of strings, at least one of which is known to occur in 29 any string matching the regexp. */ 30 static kwset_t kwset; 31 32 void 33 Fcompile (char const *pattern, size_t size) 34 { 35 char const *beg, *end, *lim, *err, *pat; 36 size_t psize; 37 38 kwsinit (&kwset); 39 psize = size; 40 #if MBS_SUPPORT 41 if (match_icase && MB_CUR_MAX > 1) 42 pat = mbtolower (pattern, &psize); 43 else 44 #endif 45 pat = pattern; 46 47 beg = pat; 48 do 49 { 50 for (lim = beg;; ++lim) 51 { 52 end = lim; 53 if (lim >= pat + psize) 54 break; 55 if (*lim == '\n') 56 { 57 lim++; 58 break; 59 } 60 #if HAVE_DOS_FILE_CONTENTS 61 if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n') 62 { 63 lim += 2; 64 break; 65 } 66 #endif 67 } 68 69 if ((err = kwsincr (kwset, beg, end - beg)) != NULL) 70 error (EXIT_TROUBLE, 0, "%s", err); 71 beg = lim; 72 } 73 while (beg < pat + psize); 74 75 if ((err = kwsprep (kwset)) != NULL) 76 error (EXIT_TROUBLE, 0, "%s", err); 77 } 78 79 size_t 80 Fexecute (char const *buf, size_t size, size_t *match_size, 81 char const *start_ptr) 82 { 83 char const *beg, *try, *end, *mb_start; 84 size_t len; 85 char eol = eolbyte; 86 struct kwsmatch kwsmatch; 87 size_t ret_val; 88 #if MBS_SUPPORT 89 if (MB_CUR_MAX > 1) 90 { 91 if (match_icase) 92 { 93 char *case_buf = mbtolower (buf, &size); 94 if (start_ptr) 95 start_ptr = case_buf + (start_ptr - buf); 96 buf = case_buf; 97 } 98 } 99 #endif /* MBS_SUPPORT */ 100 101 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 102 { 103 size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); 104 if (offset == (size_t) -1) 105 goto failure; 106 len = kwsmatch.size[0]; 107 #if MBS_SUPPORT 108 if (MB_CUR_MAX > 1 && is_mb_middle (&mb_start, beg + offset, buf + size, 109 len)) 110 { 111 /* The match was a part of multibyte character, advance at least 112 one byte to ensure no infinite loop happens. */ 113 mbstate_t s; 114 memset (&s, 0, sizeof s); 115 size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s); 116 if (mb_len == (size_t) -2) 117 goto failure; 118 beg = mb_start; 119 if (mb_len != (size_t) -1) 120 beg += mb_len - 1; 121 continue; 122 } 123 #endif /* MBS_SUPPORT */ 124 beg += offset; 125 if (start_ptr && !match_words) 126 goto success_in_beg_and_len; 127 if (match_lines) 128 { 129 if (beg > buf && beg[-1] != eol) 130 continue; 131 if (beg + len < buf + size && beg[len] != eol) 132 continue; 133 goto success; 134 } 135 else if (match_words) 136 for (try = beg; ; ) 137 { 138 if (try > buf && WCHAR((unsigned char) try[-1])) 139 break; 140 if (try + len < buf + size && WCHAR((unsigned char) try[len])) 141 { 142 if (!len) 143 break; 144 offset = kwsexec (kwset, beg, --len, &kwsmatch); 145 if (offset == (size_t) -1) 146 break; 147 try = beg + offset; 148 len = kwsmatch.size[0]; 149 } 150 else if (!start_ptr) 151 goto success; 152 else 153 goto success_in_beg_and_len; 154 } /* for (try) */ 155 else 156 goto success; 157 } /* for (beg in buf) */ 158 159 failure: 160 ret_val = -1; 161 goto out; 162 163 success: 164 if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL) 165 end++; 166 else 167 end = buf + size; 168 while (buf < beg && beg[-1] != eol) 169 --beg; 170 len = end - beg; 171 success_in_beg_and_len: 172 *match_size = len; 173 ret_val = beg - buf; 174 out: 175 return ret_val; 176 } 177