1 /* 2 tre_regcomp.c - TRE POSIX compatible regex compilation functions. 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifdef HAVE_CONFIG_H 10 #include <config.h> 11 #endif /* HAVE_CONFIG_H */ 12 13 #include <string.h> 14 #include <errno.h> 15 #include <stdlib.h> 16 17 #include "tre.h" 18 #include "tre-internal.h" 19 #include "xmalloc.h" 20 21 int 22 tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags, 23 locale_t loc) 24 { 25 int ret; 26 #if TRE_WCHAR 27 tre_char_t *wregex; 28 size_t wlen; 29 30 wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); 31 if (wregex == NULL) 32 return REG_ESPACE; 33 34 FIX_LOCALE(loc); 35 36 /* If the current locale uses the standard single byte encoding of 37 characters, we don't do a multibyte string conversion. If we did, 38 many applications which use the default locale would break since 39 the default "C" locale uses the 7-bit ASCII character set, and 40 all characters with the eighth bit set would be considered invalid. */ 41 #if TRE_MULTIBYTE 42 if (TRE_MB_CUR_MAX_L(loc) == 1) 43 #endif /* TRE_MULTIBYTE */ 44 { 45 unsigned int i; 46 const unsigned char *str = (const unsigned char *)regex; 47 tre_char_t *wstr = wregex; 48 49 for (i = 0; i < n; i++) 50 *(wstr++) = *(str++); 51 wlen = n; 52 } 53 #if TRE_MULTIBYTE 54 else 55 { 56 size_t consumed; 57 tre_char_t *wcptr = wregex; 58 #ifdef HAVE_MBSTATE_T 59 mbstate_t state; 60 memset(&state, '\0', sizeof(state)); 61 #endif /* HAVE_MBSTATE_T */ 62 while (n > 0) 63 { 64 consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc); 65 66 switch (consumed) 67 { 68 case 0: 69 if (*regex == '\0') 70 consumed = 1; 71 else 72 { 73 xfree(wregex); 74 return REG_BADPAT; 75 } 76 break; 77 case (size_t)-1: 78 case (size_t)-2: 79 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 80 xfree(wregex); 81 return REG_ILLSEQ; 82 } 83 regex += consumed; 84 n -= consumed; 85 wcptr++; 86 } 87 wlen = wcptr - wregex; 88 } 89 #endif /* TRE_MULTIBYTE */ 90 91 wregex[wlen] = L'\0'; 92 ret = tre_compile(preg, wregex, wlen, cflags, loc); 93 xfree(wregex); 94 #else /* !TRE_WCHAR */ 95 FIX_LOCALE(loc); 96 ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags, loc); 97 #endif /* !TRE_WCHAR */ 98 99 return ret; 100 } 101 102 int 103 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags) 104 { 105 return tre_regncomp_l(preg, regex, n, cflags, __get_locale()); 106 } 107 108 int 109 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc) 110 { 111 size_t len; 112 113 if (cflags & REG_PEND) 114 { 115 if ((const char *)(preg->re_endp) < regex) 116 return REG_INVARG; 117 len = (const char *)(preg->re_endp) - regex; 118 } 119 else 120 len = strlen(regex); 121 return tre_regncomp_l(preg, regex, len, cflags, loc); 122 } 123 124 int 125 tre_regcomp(regex_t *preg, const char *regex, int cflags) 126 { 127 return tre_regcomp_l(preg, regex, cflags, __get_locale()); 128 } 129 130 #ifdef TRE_WCHAR 131 int 132 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t n, int cflags, 133 locale_t loc) 134 { 135 FIX_LOCALE(loc); 136 return tre_compile(preg, regex, n, cflags, loc); 137 } 138 139 int 140 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags) 141 { 142 return tre_compile(preg, regex, n, cflags, __get_locale()); 143 } 144 145 int 146 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags, locale_t loc) 147 { 148 FIX_LOCALE(loc); 149 return tre_compile(preg, regex, wcslen(regex), cflags, loc); 150 } 151 152 int 153 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags) 154 { 155 return tre_regwncomp(preg, regex, wcslen(regex), cflags); 156 } 157 #endif /* TRE_WCHAR */ 158 159 void 160 tre_regfree(regex_t *preg) 161 { 162 tre_free(preg); 163 } 164 165 /* EOF */ 166