1 /* Convert multibyte character to wide character.
2    Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2008.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <config.h>
19 
20 /* Specification.  */
21 #include <wchar.h>
22 
23 #if GNULIB_defined_mbstate_t
24 /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
25    and directly for the UTF-8 locales.  */
26 
27 # include <errno.h>
28 # include <stdint.h>
29 # include <stdlib.h>
30 
31 # if defined _WIN32 && !defined __CYGWIN__
32 
33 #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
34 #  include <windows.h>
35 
36 # elif HAVE_PTHREAD_API
37 
38 #  include <pthread.h>
39 #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
40 #   include <threads.h>
41 #   pragma weak thrd_exit
42 #   define c11_threads_in_use() (thrd_exit != NULL)
43 #  else
44 #   define c11_threads_in_use() 0
45 #  endif
46 
47 # elif HAVE_THREADS_H
48 
49 #  include <threads.h>
50 
51 # endif
52 
53 # include "verify.h"
54 # include "lc-charset-dispatch.h"
55 # include "mbtowc-lock.h"
56 
57 # ifndef FALLTHROUGH
58 #  if __GNUC__ < 7
59 #   define FALLTHROUGH ((void) 0)
60 #  else
61 #   define FALLTHROUGH __attribute__ ((__fallthrough__))
62 #  endif
63 # endif
64 
65 verify (sizeof (mbstate_t) >= 4);
66 static char internal_state[4];
67 
68 size_t
mbrtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * ps)69 mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
70 {
71 # define FITS_IN_CHAR_TYPE(wc)  ((wc) <= WCHAR_MAX)
72 # include "mbrtowc-impl.h"
73 }
74 
75 #else
76 /* Override the system's mbrtowc() function.  */
77 
78 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
79 #  include "hard-locale.h"
80 #  include <locale.h>
81 # endif
82 
83 # undef mbrtowc
84 
85 size_t
rpl_mbrtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * ps)86 rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
87 {
88   size_t ret;
89   wchar_t wc;
90 
91 # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
92   if (s == NULL)
93     {
94       pwc = NULL;
95       s = "";
96       n = 1;
97     }
98 # endif
99 
100 # if MBRTOWC_EMPTY_INPUT_BUG
101   if (n == 0)
102     return (size_t) -2;
103 # endif
104 
105   if (! pwc)
106     pwc = &wc;
107 
108 # if MBRTOWC_RETVAL_BUG
109   {
110     static mbstate_t internal_state;
111 
112     /* Override mbrtowc's internal state.  We cannot call mbsinit() on the
113        hidden internal state, but we can call it on our variable.  */
114     if (ps == NULL)
115       ps = &internal_state;
116 
117     if (!mbsinit (ps))
118       {
119         /* Parse the rest of the multibyte character byte for byte.  */
120         size_t count = 0;
121         for (; n > 0; s++, n--)
122           {
123             ret = mbrtowc (&wc, s, 1, ps);
124 
125             if (ret == (size_t)(-1))
126               return (size_t)(-1);
127             count++;
128             if (ret != (size_t)(-2))
129               {
130                 /* The multibyte character has been completed.  */
131                 *pwc = wc;
132                 return (wc == 0 ? 0 : count);
133               }
134           }
135         return (size_t)(-2);
136       }
137   }
138 # endif
139 
140 # if MBRTOWC_STORES_INCOMPLETE_BUG
141   ret = mbrtowc (&wc, s, n, ps);
142   if (ret < (size_t) -2 && pwc != NULL)
143     *pwc = wc;
144 # else
145   ret = mbrtowc (pwc, s, n, ps);
146 # endif
147 
148 # if MBRTOWC_NUL_RETVAL_BUG
149   if (ret < (size_t) -2 && !*pwc)
150     return 0;
151 # endif
152 
153 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
154   if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
155     {
156       unsigned char uc = *s;
157       *pwc = uc;
158       return 1;
159     }
160 # endif
161 
162   return ret;
163 }
164 
165 #endif
166