1 /* searchutils.c - helper subroutines for grep's matchers. 2 Copyright 1992, 1998, 2000, 2007, 2009-2010 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 #include <config.h> 20 #include <assert.h> 21 #include "search.h" 22 23 #define NCHAR (UCHAR_MAX + 1) 24 25 void 26 kwsinit (kwset_t *kwset) 27 { 28 static char trans[NCHAR]; 29 int i; 30 31 if (match_icase 32 #if MBS_SUPPORT 33 && MB_CUR_MAX == 1 34 #endif 35 ) 36 { 37 for (i = 0; i < NCHAR; ++i) 38 trans[i] = tolower (i); 39 40 *kwset = kwsalloc (trans); 41 } 42 else 43 *kwset = kwsalloc (NULL); 44 45 if (!*kwset) 46 xalloc_die (); 47 } 48 49 #if MBS_SUPPORT 50 /* Convert the *N-byte string, BEG, to lowercase, and write the 51 NUL-terminated result into malloc'd storage. Upon success, set *N 52 to the length (in bytes) of the resulting string (not including the 53 trailing NUL byte), and return a pointer to the lowercase string. 54 Upon memory allocation failure, this function exits. 55 Note that on input, *N must be larger than zero. 56 57 Note that while this function returns a pointer to malloc'd storage, 58 the caller must not free it, since this function retains a pointer 59 to the buffer and reuses it on any subsequent call. As a consequence, 60 this function is not thread-safe. */ 61 char * 62 mbtolower (const char *beg, size_t *n) 63 { 64 static char *out; 65 static size_t outalloc; 66 size_t outlen, mb_cur_max; 67 mbstate_t is, os; 68 const char *end; 69 char *p; 70 71 if (*n > outalloc || outalloc == 0) 72 { 73 outalloc = MAX(1, *n); 74 out = xrealloc (out, outalloc); 75 } 76 77 /* appease clang-2.6 */ 78 assert (out); 79 if (*n == 0) 80 return out; 81 82 memset (&is, 0, sizeof (is)); 83 memset (&os, 0, sizeof (os)); 84 end = beg + *n; 85 86 mb_cur_max = MB_CUR_MAX; 87 p = out; 88 outlen = 0; 89 while (beg < end) 90 { 91 wchar_t wc; 92 size_t mbclen = mbrtowc(&wc, beg, end - beg, &is); 93 if (outlen + mb_cur_max >= outalloc) 94 { 95 out = x2nrealloc (out, &outalloc, 1); 96 p = out + outlen; 97 } 98 99 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) 100 { 101 /* An invalid sequence, or a truncated multi-octet character. 102 We treat it as a single-octet character. */ 103 *p++ = *beg++; 104 outlen++; 105 memset (&is, 0, sizeof (is)); 106 memset (&os, 0, sizeof (os)); 107 } 108 else 109 { 110 beg += mbclen; 111 mbclen = wcrtomb (p, towlower ((wint_t) wc), &os); 112 p += mbclen; 113 outlen += mbclen; 114 } 115 } 116 117 *n = p - out; 118 *p = 0; 119 return out; 120 } 121 122 123 bool 124 is_mb_middle (const char **good, const char *buf, const char *end, 125 size_t match_len) 126 { 127 const char *p = *good; 128 const char *prev = p; 129 mbstate_t cur_state; 130 131 /* TODO: can be optimized for UTF-8. */ 132 memset(&cur_state, 0, sizeof(mbstate_t)); 133 while (p < buf) 134 { 135 size_t mbclen = mbrlen(p, end - p, &cur_state); 136 137 /* Store the beginning of the previous complete multibyte character. */ 138 if (mbclen != (size_t) -2) 139 prev = p; 140 141 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) 142 { 143 /* An invalid sequence, or a truncated multibyte character. 144 We treat it as a single byte character. */ 145 mbclen = 1; 146 memset(&cur_state, 0, sizeof cur_state); 147 } 148 p += mbclen; 149 } 150 151 *good = prev; 152 153 if (p > buf) 154 return true; 155 156 /* P == BUF here. */ 157 return 0 < match_len && match_len < mbrlen (p, end - p, &cur_state); 158 } 159 #endif /* MBS_SUPPORT */ 160