1 /* kwsearch.c - searching subroutines using kwset for grep. 2 Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written August 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include "search.h" 23 24 /* For -w, we also consider _ to be word constituent. */ 25 #define WCHAR(C) (isalnum (C) || (C) == '_') 26 27 /* KWset compiled pattern. For Ecompile and Gcompile, we compile 28 a list of strings, at least one of which is known to occur in 29 any string matching the regexp. */ 30 static kwset_t kwset; 31 32 void 33 Fcompile (char const *pattern, size_t size) 34 { 35 char const *err; 36 size_t psize = size; 37 char const *pat = (match_icase && MB_CUR_MAX > 1 38 ? mbtolower (pattern, &psize) 39 : pattern); 40 41 kwsinit (&kwset); 42 43 char const *beg = pat; 44 do 45 { 46 char const *lim; 47 char const *end; 48 for (lim = beg;; ++lim) 49 { 50 end = lim; 51 if (lim >= pat + psize) 52 break; 53 if (*lim == '\n') 54 { 55 lim++; 56 break; 57 } 58 #if HAVE_DOS_FILE_CONTENTS 59 if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n') 60 { 61 lim += 2; 62 break; 63 } 64 #endif 65 } 66 67 if ((err = kwsincr (kwset, beg, end - beg)) != NULL) 68 error (EXIT_TROUBLE, 0, "%s", err); 69 beg = lim; 70 } 71 while (beg < pat + psize); 72 73 if ((err = kwsprep (kwset)) != NULL) 74 error (EXIT_TROUBLE, 0, "%s", err); 75 } 76 77 size_t 78 Fexecute (char const *buf, size_t size, size_t *match_size, 79 char const *start_ptr) 80 { 81 char const *beg, *try, *end, *mb_start; 82 size_t len; 83 char eol = eolbyte; 84 struct kwsmatch kwsmatch; 85 size_t ret_val; 86 if (MB_CUR_MAX > 1) 87 { 88 if (match_icase) 89 { 90 char *case_buf = mbtolower (buf, &size); 91 if (start_ptr) 92 start_ptr = case_buf + (start_ptr - buf); 93 buf = case_buf; 94 } 95 } 96 97 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 98 { 99 size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); 100 if (offset == (size_t) -1) 101 goto failure; 102 len = kwsmatch.size[0]; 103 if (MB_CUR_MAX > 1 104 && is_mb_middle (&mb_start, beg + offset, buf + size, len)) 105 { 106 /* The match was a part of multibyte character, advance at least 107 one byte to ensure no infinite loop happens. */ 108 mbstate_t s; 109 memset (&s, 0, sizeof s); 110 size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s); 111 if (mb_len == (size_t) -2) 112 goto failure; 113 beg = mb_start; 114 if (mb_len != (size_t) -1) 115 beg += mb_len - 1; 116 continue; 117 } 118 beg += offset; 119 if (start_ptr && !match_words) 120 goto success_in_beg_and_len; 121 if (match_lines) 122 { 123 if (beg > buf && beg[-1] != eol) 124 continue; 125 if (beg + len < buf + size && beg[len] != eol) 126 continue; 127 goto success; 128 } 129 else if (match_words) 130 for (try = beg; ; ) 131 { 132 if (try > buf && WCHAR((unsigned char) try[-1])) 133 break; 134 if (try + len < buf + size && WCHAR((unsigned char) try[len])) 135 { 136 if (!len) 137 break; 138 offset = kwsexec (kwset, beg, --len, &kwsmatch); 139 if (offset == (size_t) -1) 140 break; 141 try = beg + offset; 142 len = kwsmatch.size[0]; 143 } 144 else if (!start_ptr) 145 goto success; 146 else 147 goto success_in_beg_and_len; 148 } /* for (try) */ 149 else 150 goto success; 151 } /* for (beg in buf) */ 152 153 failure: 154 ret_val = -1; 155 goto out; 156 157 success: 158 if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL) 159 end++; 160 else 161 end = buf + size; 162 while (buf < beg && beg[-1] != eol) 163 --beg; 164 len = end - beg; 165 success_in_beg_and_len: 166 *match_size = len; 167 ret_val = beg - buf; 168 out: 169 return ret_val; 170 } 171