1 /* Convert multibyte character to wide character.
2    Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18 
19 /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
20    that handles the special case of the UTF-8 encoding.  */
21 
22         /* Cf. unistr/u8-mbtouc.c.  */
23         unsigned char c = (unsigned char) p[0];
24 
25         if (c < 0x80)
26           {
27             if (pwc != NULL)
28               *pwc = c;
29             res = (c == 0 ? 0 : 1);
30             goto success;
31           }
32         if (c >= 0xc2)
33           {
34             if (c < 0xe0)
35               {
36                 if (m == 1)
37                   goto incomplete;
38                 else /* m >= 2 */
39                   {
40                     unsigned char c2 = (unsigned char) p[1];
41 
42                     if ((c2 ^ 0x80) < 0x40)
43                       {
44                         if (pwc != NULL)
45                           *pwc = ((unsigned int) (c & 0x1f) << 6)
46                                  | (unsigned int) (c2 ^ 0x80);
47                         res = 2;
48                         goto success;
49                       }
50                   }
51               }
52             else if (c < 0xf0)
53               {
54                 if (m == 1)
55                   goto incomplete;
56                 else
57                   {
58                     unsigned char c2 = (unsigned char) p[1];
59 
60                     if ((c2 ^ 0x80) < 0x40
61                         && (c >= 0xe1 || c2 >= 0xa0)
62                         && (c != 0xed || c2 < 0xa0))
63                       {
64                         if (m == 2)
65                           goto incomplete;
66                         else /* m >= 3 */
67                           {
68                             unsigned char c3 = (unsigned char) p[2];
69 
70                             if ((c3 ^ 0x80) < 0x40)
71                               {
72                                 unsigned int wc =
73                                   (((unsigned int) (c & 0x0f) << 12)
74                                    | ((unsigned int) (c2 ^ 0x80) << 6)
75                                    | (unsigned int) (c3 ^ 0x80));
76 
77                                 if (FITS_IN_CHAR_TYPE (wc))
78                                   {
79                                     if (pwc != NULL)
80                                       *pwc = wc;
81                                     res = 3;
82                                     goto success;
83                                   }
84                               }
85                           }
86                       }
87                   }
88               }
89             else if (c <= 0xf4)
90               {
91                 if (m == 1)
92                   goto incomplete;
93                 else
94                   {
95                     unsigned char c2 = (unsigned char) p[1];
96 
97                     if ((c2 ^ 0x80) < 0x40
98                         && (c >= 0xf1 || c2 >= 0x90)
99                         && (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
100                       {
101                         if (m == 2)
102                           goto incomplete;
103                         else
104                           {
105                             unsigned char c3 = (unsigned char) p[2];
106 
107                             if ((c3 ^ 0x80) < 0x40)
108                               {
109                                 if (m == 3)
110                                   goto incomplete;
111                                 else /* m >= 4 */
112                                   {
113                                     unsigned char c4 = (unsigned char) p[3];
114 
115                                     if ((c4 ^ 0x80) < 0x40)
116                                       {
117                                         unsigned int wc =
118                                           (((unsigned int) (c & 0x07) << 18)
119                                            | ((unsigned int) (c2 ^ 0x80) << 12)
120                                            | ((unsigned int) (c3 ^ 0x80) << 6)
121                                            | (unsigned int) (c4 ^ 0x80));
122 
123                                         if (FITS_IN_CHAR_TYPE (wc))
124                                           {
125                                             if (pwc != NULL)
126                                               *pwc = wc;
127                                             res = 4;
128                                             goto success;
129                                           }
130                                       }
131                                   }
132                               }
133                           }
134                       }
135                   }
136               }
137           }
138         goto invalid;
139