xref: /dragonfly/contrib/tre/lib/regcomp.c (revision c69bf40f)
1 /*
2   tre_regcomp.c - TRE POSIX compatible regex compilation functions.
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 
13 #include <string.h>
14 #include <errno.h>
15 #include <stdlib.h>
16 
17 #include "tre.h"
18 #include "tre-internal.h"
19 #include "xmalloc.h"
20 
21 int
22 tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags,
23     locale_t loc)
24 {
25   int ret;
26 #if TRE_WCHAR
27   tre_char_t *wregex;
28   size_t wlen;
29 
30   wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
31   if (wregex == NULL)
32     return REG_ESPACE;
33 
34   FIX_LOCALE(loc);
35 
36   /* If the current locale uses the standard single byte encoding of
37      characters, we don't do a multibyte string conversion.  If we did,
38      many applications which use the default locale would break since
39      the default "C" locale uses the 7-bit ASCII character set, and
40      all characters with the eighth bit set would be considered invalid. */
41 #if TRE_MULTIBYTE
42   if (TRE_MB_CUR_MAX_L(loc) == 1)
43 #endif /* TRE_MULTIBYTE */
44     {
45       unsigned int i;
46       const unsigned char *str = (const unsigned char *)regex;
47       tre_char_t *wstr = wregex;
48 
49       for (i = 0; i < n; i++)
50 	*(wstr++) = *(str++);
51       wlen = n;
52     }
53 #if TRE_MULTIBYTE
54   else
55     {
56       size_t consumed;
57       tre_char_t *wcptr = wregex;
58 #ifdef HAVE_MBSTATE_T
59       mbstate_t state;
60       memset(&state, '\0', sizeof(state));
61 #endif /* HAVE_MBSTATE_T */
62       while (n > 0)
63 	{
64 	  consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc);
65 
66 	  switch (consumed)
67 	    {
68 	    case 0:
69 	      if (*regex == '\0')
70 		consumed = 1;
71 	      else
72 		{
73 		  xfree(wregex);
74 		  return REG_BADPAT;
75 		}
76 	      break;
77 	    case (size_t)-1:
78 	    case (size_t)-2:
79 	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
80 	      xfree(wregex);
81 	      return REG_ILLSEQ;
82 	    }
83 	  regex += consumed;
84 	  n -= consumed;
85 	  wcptr++;
86 	}
87       wlen = wcptr - wregex;
88     }
89 #endif /* TRE_MULTIBYTE */
90 
91   wregex[wlen] = L'\0';
92   ret = tre_compile(preg, wregex, wlen, cflags, loc);
93   xfree(wregex);
94 #else /* !TRE_WCHAR */
95   FIX_LOCALE(loc);
96   ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags, loc);
97 #endif /* !TRE_WCHAR */
98 
99   return ret;
100 }
101 
102 int
103 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
104 {
105   return tre_regncomp_l(preg, regex, n, cflags, __get_locale());
106 }
107 
108 int
109 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc)
110 {
111   size_t len;
112 
113   if (cflags & REG_PEND)
114     {
115       if ((const char *)(preg->re_endp) < regex)
116 	return REG_INVARG;
117       len = (const char *)(preg->re_endp) - regex;
118     }
119   else
120     len = strlen(regex);
121   return tre_regncomp_l(preg, regex, len, cflags, loc);
122 }
123 
124 int
125 tre_regcomp(regex_t *preg, const char *regex, int cflags)
126 {
127   return tre_regcomp_l(preg, regex, cflags, __get_locale());
128 }
129 
130 #ifdef TRE_WCHAR
131 int
132 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t n, int cflags,
133     locale_t loc)
134 {
135   FIX_LOCALE(loc);
136   return tre_compile(preg, regex, n, cflags, loc);
137 }
138 
139 int
140 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
141 {
142   return tre_compile(preg, regex, n, cflags, __get_locale());
143 }
144 
145 int
146 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags, locale_t loc)
147 {
148   FIX_LOCALE(loc);
149   return tre_compile(preg, regex, wcslen(regex), cflags, loc);
150 }
151 
152 int
153 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
154 {
155   return tre_regwncomp(preg, regex, wcslen(regex), cflags);
156 }
157 #endif /* TRE_WCHAR */
158 
159 void
160 tre_regfree(regex_t *preg)
161 {
162   tre_free(preg);
163 }
164 
165 /* EOF */
166