xref: /netbsd/external/bsd/tre/dist/lib/regcomp.c (revision 6550d01e)
1 /*
2   tre_regcomp.c - TRE POSIX compatible regex compilation functions.
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 
13 #include <string.h>
14 #include <errno.h>
15 #include <stdlib.h>
16 
17 #include "tre.h"
18 #include "tre-internal.h"
19 #include "xmalloc.h"
20 
21 #ifdef __weak_alias
22 __weak_alias(regcomp,_regcomp)
23 __weak_alias(regfree,_regfree)
24 #endif
25 
26 int
27 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
28 {
29   int ret;
30 #if TRE_WCHAR
31   tre_char_t *wregex;
32   size_t wlen;
33 
34   wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
35   if (wregex == NULL)
36     return REG_ESPACE;
37 
38   /* If the current locale uses the standard single byte encoding of
39      characters, we don't do a multibyte string conversion.  If we did,
40      many applications which use the default locale would break since
41      the default "C" locale uses the 7-bit ASCII character set, and
42      all characters with the eighth bit set would be considered invalid. */
43 #if TRE_MULTIBYTE
44   if (TRE_MB_CUR_MAX == 1)
45 #endif /* TRE_MULTIBYTE */
46     {
47       unsigned int i;
48       const unsigned char *str = (const unsigned char *)regex;
49       tre_char_t *wstr = wregex;
50 
51       for (i = 0; i < n; i++)
52 	*(wstr++) = *(str++);
53       wlen = n;
54     }
55 #if TRE_MULTIBYTE
56   else
57     {
58       int consumed;
59       tre_char_t *wcptr = wregex;
60 #ifdef HAVE_MBSTATE_T
61       mbstate_t state;
62       memset(&state, '\0', sizeof(state));
63 #endif /* HAVE_MBSTATE_T */
64       while (n > 0)
65 	{
66 	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
67 
68 	  switch (consumed)
69 	    {
70 	    case 0:
71 	      if (*regex == '\0')
72 		consumed = 1;
73 	      else
74 		{
75 		  xfree(wregex);
76 		  return REG_BADPAT;
77 		}
78 	      break;
79 	    case -1:
80 	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
81 	      xfree(wregex);
82 	      return REG_BADPAT;
83 	    case -2:
84 	      /* The last character wasn't complete.  Let's not call it a
85 		 fatal error. */
86 	      consumed = n;
87 	      break;
88 	    }
89 	  regex += consumed;
90 	  n -= consumed;
91 	  wcptr++;
92 	}
93       wlen = wcptr - wregex;
94     }
95 #endif /* TRE_MULTIBYTE */
96 
97   wregex[wlen] = L'\0';
98   ret = tre_compile(preg, wregex, wlen, cflags);
99   xfree(wregex);
100 #else /* !TRE_WCHAR */
101   ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
102 #endif /* !TRE_WCHAR */
103 
104   return ret;
105 }
106 
107 int
108 tre_regcomp(regex_t *preg, const char *regex, int cflags)
109 {
110   return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
111 }
112 
113 
114 #ifdef TRE_WCHAR
115 int
116 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
117 {
118   return tre_compile(preg, regex, n, cflags);
119 }
120 
121 int
122 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
123 {
124   return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
125 }
126 #endif /* TRE_WCHAR */
127 
128 void
129 tre_regfree(regex_t *preg)
130 {
131   tre_free(preg);
132 }
133 
134 /* EOF */
135