1 /* searchutils.c - helper subroutines for grep's matchers. 2 Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 #include <config.h> 20 #include <assert.h> 21 #include "search.h" 22 23 #define NCHAR (UCHAR_MAX + 1) 24 25 void 26 kwsinit (kwset_t *kwset) 27 { 28 static char trans[NCHAR]; 29 int i; 30 31 if (match_icase && MB_CUR_MAX == 1) 32 { 33 for (i = 0; i < NCHAR; ++i) 34 trans[i] = tolower (i); 35 36 *kwset = kwsalloc (trans); 37 } 38 else 39 *kwset = kwsalloc (NULL); 40 41 if (!*kwset) 42 xalloc_die (); 43 } 44 45 #if MBS_SUPPORT 46 /* Convert the *N-byte string, BEG, to lowercase, and write the 47 NUL-terminated result into malloc'd storage. Upon success, set *N 48 to the length (in bytes) of the resulting string (not including the 49 trailing NUL byte), and return a pointer to the lowercase string. 50 Upon memory allocation failure, this function exits. 51 Note that on input, *N must be larger than zero. 52 53 Note that while this function returns a pointer to malloc'd storage, 54 the caller must not free it, since this function retains a pointer 55 to the buffer and reuses it on any subsequent call. As a consequence, 56 this function is not thread-safe. */ 57 char * 58 mbtolower (const char *beg, size_t *n) 59 { 60 static char *out; 61 static size_t outalloc; 62 size_t outlen, mb_cur_max; 63 mbstate_t is, os; 64 const char *end; 65 char *p; 66 67 if (*n > outalloc || outalloc == 0) 68 { 69 outalloc = MAX(1, *n); 70 out = xrealloc (out, outalloc); 71 } 72 73 /* appease clang-2.6 */ 74 assert (out); 75 if (*n == 0) 76 return out; 77 78 memset (&is, 0, sizeof (is)); 79 memset (&os, 0, sizeof (os)); 80 end = beg + *n; 81 82 mb_cur_max = MB_CUR_MAX; 83 p = out; 84 outlen = 0; 85 while (beg < end) 86 { 87 wchar_t wc; 88 size_t mbclen = mbrtowc(&wc, beg, end - beg, &is); 89 if (outlen + mb_cur_max >= outalloc) 90 { 91 out = x2nrealloc (out, &outalloc, 1); 92 p = out + outlen; 93 } 94 95 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) 96 { 97 /* An invalid sequence, or a truncated multi-octet character. 98 We treat it as a single-octet character. */ 99 *p++ = *beg++; 100 outlen++; 101 memset (&is, 0, sizeof (is)); 102 memset (&os, 0, sizeof (os)); 103 } 104 else 105 { 106 beg += mbclen; 107 mbclen = wcrtomb (p, towlower ((wint_t) wc), &os); 108 p += mbclen; 109 outlen += mbclen; 110 } 111 } 112 113 *n = p - out; 114 *p = 0; 115 return out; 116 } 117 118 119 bool 120 is_mb_middle (const char **good, const char *buf, const char *end, 121 size_t match_len) 122 { 123 const char *p = *good; 124 const char *prev = p; 125 mbstate_t cur_state; 126 127 /* TODO: can be optimized for UTF-8. */ 128 memset(&cur_state, 0, sizeof(mbstate_t)); 129 while (p < buf) 130 { 131 size_t mbclen = mbrlen(p, end - p, &cur_state); 132 133 /* Store the beginning of the previous complete multibyte character. */ 134 if (mbclen != (size_t) -2) 135 prev = p; 136 137 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) 138 { 139 /* An invalid sequence, or a truncated multibyte character. 140 We treat it as a single byte character. */ 141 mbclen = 1; 142 memset(&cur_state, 0, sizeof cur_state); 143 } 144 p += mbclen; 145 } 146 147 *good = prev; 148 149 if (p > buf) 150 return true; 151 152 /* P == BUF here. */ 153 return 0 < match_len && match_len < mbrlen (p, end - p, &cur_state); 154 } 155 #endif /* MBS_SUPPORT */ 156