1 /* kwsearch.c - searching subroutines using kwset for grep. 2 Copyright 1992, 1998, 2000, 2007, 2009-2014 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written August 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include "search.h" 23 24 /* Whether -w considers WC to be a word constituent. */ 25 static bool 26 wordchar (wint_t wc) 27 { 28 return wc == L'_' || iswalnum (wc); 29 } 30 31 /* KWset compiled pattern. For Ecompile and Gcompile, we compile 32 a list of strings, at least one of which is known to occur in 33 any string matching the regexp. */ 34 static kwset_t kwset; 35 36 void 37 Fcompile (char const *pattern, size_t size) 38 { 39 size_t total = size; 40 mb_len_map_t *map = NULL; 41 char const *pat = (match_icase && MB_CUR_MAX > 1 42 ? mbtoupper (pattern, &total, &map) 43 : pattern); 44 45 kwsinit (&kwset); 46 47 char const *p = pat; 48 do 49 { 50 size_t len; 51 char const *sep = memchr (p, '\n', total); 52 if (sep) 53 { 54 len = sep - p; 55 sep++; 56 total -= (len + 1); 57 } 58 else 59 { 60 len = total; 61 total = 0; 62 } 63 64 char *buf = NULL; 65 if (match_lines) 66 { 67 buf = xmalloc (len + 2); 68 buf[0] = eolbyte; 69 memcpy (buf + 1, p, len); 70 buf[len + 1] = eolbyte; 71 p = buf; 72 len += 2; 73 } 74 kwsincr (kwset, p, len); 75 free (buf); 76 77 p = sep; 78 } 79 while (p); 80 81 kwsprep (kwset); 82 } 83 84 /* Apply the MAP (created by mbtoupper) to the uppercase-buffer-relative 85 *OFF and *LEN, converting them to be relative to the original buffer. */ 86 87 static void 88 mb_case_map_apply (mb_len_map_t const *map, size_t *off, size_t *len) 89 { 90 if (map) 91 { 92 size_t off_incr = 0; 93 size_t len_incr = 0; 94 size_t k; 95 for (k = 0; k < *off; k++) 96 off_incr += map[k]; 97 for (; k < *off + *len; k++) 98 len_incr += map[k]; 99 *off += off_incr; 100 *len += len_incr; 101 } 102 } 103 104 size_t 105 Fexecute (char const *buf, size_t size, size_t *match_size, 106 char const *start_ptr) 107 { 108 char const *beg, *try, *end, *mb_start; 109 size_t len; 110 char eol = eolbyte; 111 struct kwsmatch kwsmatch; 112 size_t ret_val; 113 mb_len_map_t *map = NULL; 114 115 if (MB_CUR_MAX > 1) 116 { 117 if (match_icase) 118 { 119 char *case_buf = mbtoupper (buf, &size, &map); 120 if (start_ptr) 121 start_ptr = case_buf + (start_ptr - buf); 122 buf = case_buf; 123 } 124 } 125 126 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 127 { 128 size_t offset = kwsexec (kwset, beg - match_lines, 129 buf + size - beg + match_lines, &kwsmatch); 130 if (offset == (size_t) -1) 131 goto failure; 132 len = kwsmatch.size[0] - match_lines; 133 if (!match_lines && MB_CUR_MAX > 1 && !using_utf8 () 134 && mb_goback (&mb_start, beg + offset, buf + size) != 0) 135 { 136 /* The match was a part of multibyte character, advance at least 137 one byte to ensure no infinite loop happens. */ 138 beg = mb_start; 139 continue; 140 } 141 beg += offset; 142 if (start_ptr && !match_words) 143 goto success_in_beg_and_len; 144 if (match_lines) 145 goto success_in_beg_and_len; 146 if (match_words) 147 for (try = beg; ; ) 148 { 149 if (wordchar (mb_prev_wc (buf, try, buf + size))) 150 break; 151 if (wordchar (mb_next_wc (try + len, buf + size))) 152 { 153 if (!len) 154 break; 155 offset = kwsexec (kwset, beg, --len, &kwsmatch); 156 if (offset == (size_t) -1) 157 break; 158 try = beg + offset; 159 len = kwsmatch.size[0]; 160 } 161 else if (!start_ptr) 162 goto success; 163 else 164 goto success_in_beg_and_len; 165 } /* for (try) */ 166 else 167 goto success; 168 } /* for (beg in buf) */ 169 170 failure: 171 return -1; 172 173 success: 174 if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL) 175 end++; 176 else 177 end = buf + size; 178 while (buf < beg && beg[-1] != eol) 179 --beg; 180 len = end - beg; 181 success_in_beg_and_len:; 182 size_t off = beg - buf; 183 mb_case_map_apply (map, &off, &len); 184 185 *match_size = len; 186 ret_val = off; 187 return ret_val; 188 } 189