1 /* kwsearch.c - searching subroutines using kwset for grep. 2 Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written August 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include "search.h" 23 24 /* For -w, we also consider _ to be word constituent. */ 25 #define WCHAR(C) (isalnum (C) || (C) == '_') 26 27 /* KWset compiled pattern. For Ecompile and Gcompile, we compile 28 a list of strings, at least one of which is known to occur in 29 any string matching the regexp. */ 30 static kwset_t kwset; 31 32 void 33 Fcompile (char const *pattern, size_t size) 34 { 35 char const *err; 36 size_t psize = size; 37 mb_len_map_t *map = NULL; 38 char const *pat = (match_icase && MB_CUR_MAX > 1 39 ? mbtolower (pattern, &psize, &map) 40 : pattern); 41 42 kwsinit (&kwset); 43 44 char const *beg = pat; 45 do 46 { 47 char const *lim; 48 char const *end; 49 for (lim = beg;; ++lim) 50 { 51 end = lim; 52 if (lim >= pat + psize) 53 break; 54 if (*lim == '\n') 55 { 56 lim++; 57 break; 58 } 59 #if HAVE_DOS_FILE_CONTENTS 60 if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n') 61 { 62 lim += 2; 63 break; 64 } 65 #endif 66 } 67 68 if ((err = kwsincr (kwset, beg, end - beg)) != NULL) 69 error (EXIT_TROUBLE, 0, "%s", err); 70 beg = lim; 71 } 72 while (beg < pat + psize); 73 74 if ((err = kwsprep (kwset)) != NULL) 75 error (EXIT_TROUBLE, 0, "%s", err); 76 } 77 78 size_t 79 Fexecute (char const *buf, size_t size, size_t *match_size, 80 char const *start_ptr) 81 { 82 char const *beg, *try, *end, *mb_start; 83 size_t len; 84 char eol = eolbyte; 85 struct kwsmatch kwsmatch; 86 size_t ret_val; 87 mb_len_map_t *map = NULL; 88 89 if (MB_CUR_MAX > 1) 90 { 91 if (match_icase) 92 { 93 char *case_buf = mbtolower (buf, &size, &map); 94 if (start_ptr) 95 start_ptr = case_buf + (start_ptr - buf); 96 buf = case_buf; 97 } 98 } 99 100 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 101 { 102 size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); 103 if (offset == (size_t) -1) 104 goto failure; 105 len = kwsmatch.size[0]; 106 if (MB_CUR_MAX > 1 107 && is_mb_middle (&mb_start, beg + offset, buf + size, len)) 108 { 109 /* The match was a part of multibyte character, advance at least 110 one byte to ensure no infinite loop happens. */ 111 mbstate_t s; 112 memset (&s, 0, sizeof s); 113 size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s); 114 if (mb_len == (size_t) -2) 115 goto failure; 116 beg = mb_start; 117 if (mb_len != (size_t) -1) 118 beg += mb_len - 1; 119 continue; 120 } 121 beg += offset; 122 if (start_ptr && !match_words) 123 goto success_in_beg_and_len; 124 if (match_lines) 125 { 126 if (beg > buf && beg[-1] != eol) 127 continue; 128 if (beg + len < buf + size && beg[len] != eol) 129 continue; 130 goto success; 131 } 132 else if (match_words) 133 for (try = beg; ; ) 134 { 135 if (try > buf && WCHAR((unsigned char) try[-1])) 136 break; 137 if (try + len < buf + size && WCHAR((unsigned char) try[len])) 138 { 139 if (!len) 140 break; 141 offset = kwsexec (kwset, beg, --len, &kwsmatch); 142 if (offset == (size_t) -1) 143 break; 144 try = beg + offset; 145 len = kwsmatch.size[0]; 146 } 147 else if (!start_ptr) 148 goto success; 149 else 150 goto success_in_beg_and_len; 151 } /* for (try) */ 152 else 153 goto success; 154 } /* for (beg in buf) */ 155 156 failure: 157 ret_val = -1; 158 goto out; 159 160 success: 161 if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL) 162 end++; 163 else 164 end = buf + size; 165 while (buf < beg && beg[-1] != eol) 166 --beg; 167 len = end - beg; 168 success_in_beg_and_len:; 169 size_t off = beg - buf; 170 mb_case_map_apply (map, &off, &len); 171 172 *match_size = len; 173 ret_val = off; 174 out: 175 return ret_val; 176 } 177