1 /* kwsearch.c - searching subroutines using kwset for grep. 2 Copyright 1992, 1998, 2000, 2007, 2009-2015 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written August 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include "search.h" 23 24 /* Whether -w considers WC to be a word constituent. */ 25 static bool 26 wordchar (wint_t wc) 27 { 28 return wc == L'_' || iswalnum (wc); 29 } 30 31 /* KWset compiled pattern. For Ecompile and Gcompile, we compile 32 a list of strings, at least one of which is known to occur in 33 any string matching the regexp. */ 34 static kwset_t kwset; 35 36 void 37 Fcompile (char const *pattern, size_t size) 38 { 39 size_t total = size; 40 41 kwsinit (&kwset); 42 43 char const *p = pattern; 44 do 45 { 46 size_t len; 47 char const *sep = memchr (p, '\n', total); 48 if (sep) 49 { 50 len = sep - p; 51 sep++; 52 total -= (len + 1); 53 } 54 else 55 { 56 len = total; 57 total = 0; 58 } 59 60 char *buf = NULL; 61 if (match_lines) 62 { 63 buf = xmalloc (len + 2); 64 buf[0] = eolbyte; 65 memcpy (buf + 1, p, len); 66 buf[len + 1] = eolbyte; 67 p = buf; 68 len += 2; 69 } 70 kwsincr (kwset, p, len); 71 free (buf); 72 73 p = sep; 74 } 75 while (p); 76 77 kwsprep (kwset); 78 } 79 80 size_t 81 Fexecute (char const *buf, size_t size, size_t *match_size, 82 char const *start_ptr) 83 { 84 char const *beg, *try, *end, *mb_start; 85 size_t len; 86 char eol = eolbyte; 87 struct kwsmatch kwsmatch; 88 size_t ret_val; 89 90 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 91 { 92 size_t offset = kwsexec (kwset, beg - match_lines, 93 buf + size - beg + match_lines, &kwsmatch); 94 if (offset == (size_t) -1) 95 goto failure; 96 len = kwsmatch.size[0] - 2 * match_lines; 97 if (!match_lines && MB_CUR_MAX > 1 && !using_utf8 () 98 && mb_goback (&mb_start, beg + offset, buf + size) != 0) 99 { 100 /* We have matched a single byte that is not at the beginning of a 101 multibyte character. mb_goback has advanced MB_START past that 102 multibyte character. Now, we want to position BEG so that the 103 next kwsexec search starts there. Thus, to compensate for the 104 for-loop's BEG++, above, subtract one here. This code is 105 unusually hard to reach, and exceptionally, let's show how to 106 trigger it here: 107 108 printf '\203AA\n'|LC_ALL=ja_JP.SHIFT_JIS src/grep -F A 109 110 That assumes the named locale is installed. 111 Note that your system's shift-JIS locale may have a different 112 name, possibly including "sjis". */ 113 beg = mb_start - 1; 114 continue; 115 } 116 beg += offset; 117 if (start_ptr && !match_words) 118 goto success_in_beg_and_len; 119 if (match_lines) 120 { 121 len += start_ptr == NULL; 122 goto success_in_beg_and_len; 123 } 124 if (match_words) 125 for (try = beg; ; ) 126 { 127 char const *bol = memrchr (buf, eol, beg - buf); 128 bol = bol ? bol + 1 : buf; 129 if (wordchar (mb_prev_wc (bol, try, buf + size))) 130 break; 131 if (wordchar (mb_next_wc (try + len, buf + size))) 132 { 133 if (!len) 134 break; 135 offset = kwsexec (kwset, beg, --len, &kwsmatch); 136 if (offset == (size_t) -1) 137 break; 138 try = beg + offset; 139 len = kwsmatch.size[0]; 140 } 141 else if (!start_ptr) 142 goto success; 143 else 144 goto success_in_beg_and_len; 145 } /* for (try) */ 146 else 147 goto success; 148 } /* for (beg in buf) */ 149 150 failure: 151 return -1; 152 153 success: 154 end = memchr (beg + len, eol, (buf + size) - (beg + len)); 155 end = end ? end + 1 : buf + size; 156 beg = memrchr (buf, eol, beg - buf); 157 beg = beg ? beg + 1 : buf; 158 len = end - beg; 159 success_in_beg_and_len:; 160 size_t off = beg - buf; 161 162 *match_size = len; 163 ret_val = off; 164 return ret_val; 165 } 166