1 /*
2 * PROJECT: ReactOS Kernel - Vista+ APIs
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * PURPOSE: Rtl functions of Vista+
5 * COPYRIGHT: 2016 Thomas Faber <thomas.faber@reactos.org>
6 */
7
8 /* INCLUDES ******************************************************************/
9
10 #include <rtl_vista.h>
11
12 #define NDEBUG
13 #include <debug.h>
14
15 /* FUNCTIONS *****************************************************************/
16
17 /******************************************************************************
18 * RtlUnicodeToUTF8N [NTDLL.@]
19 */
RtlUnicodeToUTF8N(CHAR * utf8_dest,ULONG utf8_bytes_max,ULONG * utf8_bytes_written,const WCHAR * uni_src,ULONG uni_bytes)20 NTSTATUS NTAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max,
21 ULONG *utf8_bytes_written,
22 const WCHAR *uni_src, ULONG uni_bytes)
23 {
24 NTSTATUS status;
25 ULONG i;
26 ULONG written;
27 ULONG ch;
28 BYTE utf8_ch[4];
29 ULONG utf8_ch_len;
30
31 if (!uni_src)
32 return STATUS_INVALID_PARAMETER_4;
33 if (!utf8_bytes_written)
34 return STATUS_INVALID_PARAMETER;
35 if (utf8_dest && uni_bytes % sizeof(WCHAR))
36 return STATUS_INVALID_PARAMETER_5;
37
38 written = 0;
39 status = STATUS_SUCCESS;
40
41 for (i = 0; i < uni_bytes / sizeof(WCHAR); i++)
42 {
43 /* decode UTF-16 into ch */
44 ch = uni_src[i];
45 if (ch >= 0xdc00 && ch <= 0xdfff)
46 {
47 ch = 0xfffd;
48 status = STATUS_SOME_NOT_MAPPED;
49 }
50 else if (ch >= 0xd800 && ch <= 0xdbff)
51 {
52 if (i + 1 < uni_bytes / sizeof(WCHAR))
53 {
54 ch -= 0xd800;
55 ch <<= 10;
56 if (uni_src[i + 1] >= 0xdc00 && uni_src[i + 1] <= 0xdfff)
57 {
58 ch |= uni_src[i + 1] - 0xdc00;
59 ch += 0x010000;
60 i++;
61 }
62 else
63 {
64 ch = 0xfffd;
65 status = STATUS_SOME_NOT_MAPPED;
66 }
67 }
68 else
69 {
70 ch = 0xfffd;
71 status = STATUS_SOME_NOT_MAPPED;
72 }
73 }
74
75 /* encode ch as UTF-8 */
76 ASSERT(ch <= 0x10ffff);
77 if (ch < 0x80)
78 {
79 utf8_ch[0] = ch & 0x7f;
80 utf8_ch_len = 1;
81 }
82 else if (ch < 0x800)
83 {
84 utf8_ch[0] = 0xc0 | (ch >> 6 & 0x1f);
85 utf8_ch[1] = 0x80 | (ch >> 0 & 0x3f);
86 utf8_ch_len = 2;
87 }
88 else if (ch < 0x10000)
89 {
90 utf8_ch[0] = 0xe0 | (ch >> 12 & 0x0f);
91 utf8_ch[1] = 0x80 | (ch >> 6 & 0x3f);
92 utf8_ch[2] = 0x80 | (ch >> 0 & 0x3f);
93 utf8_ch_len = 3;
94 }
95 else if (ch < 0x200000)
96 {
97 utf8_ch[0] = 0xf0 | (ch >> 18 & 0x07);
98 utf8_ch[1] = 0x80 | (ch >> 12 & 0x3f);
99 utf8_ch[2] = 0x80 | (ch >> 6 & 0x3f);
100 utf8_ch[3] = 0x80 | (ch >> 0 & 0x3f);
101 utf8_ch_len = 4;
102 }
103
104 if (!utf8_dest)
105 {
106 written += utf8_ch_len;
107 continue;
108 }
109
110 if (utf8_bytes_max >= utf8_ch_len)
111 {
112 memcpy(utf8_dest, utf8_ch, utf8_ch_len);
113 utf8_dest += utf8_ch_len;
114 utf8_bytes_max -= utf8_ch_len;
115 written += utf8_ch_len;
116 }
117 else
118 {
119 utf8_bytes_max = 0;
120 status = STATUS_BUFFER_TOO_SMALL;
121 }
122 }
123
124 *utf8_bytes_written = written;
125 return status;
126 }
127
128
129 /******************************************************************************
130 * RtlUTF8ToUnicodeN [NTDLL.@]
131 */
RtlUTF8ToUnicodeN(WCHAR * uni_dest,ULONG uni_bytes_max,ULONG * uni_bytes_written,const CHAR * utf8_src,ULONG utf8_bytes)132 NTSTATUS NTAPI RtlUTF8ToUnicodeN(WCHAR *uni_dest, ULONG uni_bytes_max,
133 ULONG *uni_bytes_written,
134 const CHAR *utf8_src, ULONG utf8_bytes)
135 {
136 NTSTATUS status;
137 ULONG i, j;
138 ULONG written;
139 ULONG ch;
140 ULONG utf8_trail_bytes;
141 WCHAR utf16_ch[3];
142 ULONG utf16_ch_len;
143
144 if (!utf8_src)
145 return STATUS_INVALID_PARAMETER_4;
146 if (!uni_bytes_written)
147 return STATUS_INVALID_PARAMETER;
148
149 written = 0;
150 status = STATUS_SUCCESS;
151
152 for (i = 0; i < utf8_bytes; i++)
153 {
154 /* read UTF-8 lead byte */
155 ch = (BYTE)utf8_src[i];
156 utf8_trail_bytes = 0;
157 if (ch >= 0xf5)
158 {
159 ch = 0xfffd;
160 status = STATUS_SOME_NOT_MAPPED;
161 }
162 else if (ch >= 0xf0)
163 {
164 ch &= 0x07;
165 utf8_trail_bytes = 3;
166 }
167 else if (ch >= 0xe0)
168 {
169 ch &= 0x0f;
170 utf8_trail_bytes = 2;
171 }
172 else if (ch >= 0xc2)
173 {
174 ch &= 0x1f;
175 utf8_trail_bytes = 1;
176 }
177 else if (ch >= 0x80)
178 {
179 /* overlong or trail byte */
180 ch = 0xfffd;
181 status = STATUS_SOME_NOT_MAPPED;
182 }
183
184 /* read UTF-8 trail bytes */
185 if (i + utf8_trail_bytes < utf8_bytes)
186 {
187 for (j = 0; j < utf8_trail_bytes; j++)
188 {
189 if ((utf8_src[i + 1] & 0xc0) == 0x80)
190 {
191 ch <<= 6;
192 ch |= utf8_src[i + 1] & 0x3f;
193 i++;
194 }
195 else
196 {
197 ch = 0xfffd;
198 utf8_trail_bytes = 0;
199 status = STATUS_SOME_NOT_MAPPED;
200 break;
201 }
202 }
203 }
204 else
205 {
206 ch = 0xfffd;
207 utf8_trail_bytes = 0;
208 status = STATUS_SOME_NOT_MAPPED;
209 i = utf8_bytes;
210 }
211
212 /* encode ch as UTF-16 */
213 if ((ch > 0x10ffff) ||
214 (ch >= 0xd800 && ch <= 0xdfff) ||
215 (utf8_trail_bytes == 2 && ch < 0x00800) ||
216 (utf8_trail_bytes == 3 && ch < 0x10000))
217 {
218 /* invalid codepoint or overlong encoding */
219 utf16_ch[0] = 0xfffd;
220 utf16_ch[1] = 0xfffd;
221 utf16_ch[2] = 0xfffd;
222 utf16_ch_len = utf8_trail_bytes;
223 status = STATUS_SOME_NOT_MAPPED;
224 }
225 else if (ch >= 0x10000)
226 {
227 /* surrogate pair */
228 ch -= 0x010000;
229 utf16_ch[0] = 0xd800 + (ch >> 10 & 0x3ff);
230 utf16_ch[1] = 0xdc00 + (ch >> 0 & 0x3ff);
231 utf16_ch_len = 2;
232 }
233 else
234 {
235 /* single unit */
236 utf16_ch[0] = ch;
237 utf16_ch_len = 1;
238 }
239
240 if (!uni_dest)
241 {
242 written += utf16_ch_len;
243 continue;
244 }
245
246 for (j = 0; j < utf16_ch_len; j++)
247 {
248 if (uni_bytes_max >= sizeof(WCHAR))
249 {
250 *uni_dest++ = utf16_ch[j];
251 uni_bytes_max -= sizeof(WCHAR);
252 written++;
253 }
254 else
255 {
256 uni_bytes_max = 0;
257 status = STATUS_BUFFER_TOO_SMALL;
258 }
259 }
260 }
261
262 *uni_bytes_written = written * sizeof(WCHAR);
263 return status;
264 }
265