1 /* 2 * PROJECT: ReactOS Kernel - Vista+ APIs 3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) 4 * PURPOSE: Rtl functions of Vista+ 5 * COPYRIGHT: 2016 Thomas Faber <thomas.faber@reactos.org> 6 */ 7 8 /* INCLUDES ******************************************************************/ 9 10 #include <rtl_vista.h> 11 12 #define NDEBUG 13 #include <debug.h> 14 15 /* FUNCTIONS *****************************************************************/ 16 17 /****************************************************************************** 18 * RtlUnicodeToUTF8N [NTDLL.@] 19 */ 20 NTSTATUS NTAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max, 21 ULONG *utf8_bytes_written, 22 const WCHAR *uni_src, ULONG uni_bytes) 23 { 24 NTSTATUS status; 25 ULONG i; 26 ULONG written; 27 ULONG ch; 28 BYTE utf8_ch[4]; 29 ULONG utf8_ch_len; 30 31 if (!uni_src) 32 return STATUS_INVALID_PARAMETER_4; 33 if (!utf8_bytes_written) 34 return STATUS_INVALID_PARAMETER; 35 if (utf8_dest && uni_bytes % sizeof(WCHAR)) 36 return STATUS_INVALID_PARAMETER_5; 37 38 written = 0; 39 status = STATUS_SUCCESS; 40 41 for (i = 0; i < uni_bytes / sizeof(WCHAR); i++) 42 { 43 /* decode UTF-16 into ch */ 44 ch = uni_src[i]; 45 if (ch >= 0xdc00 && ch <= 0xdfff) 46 { 47 ch = 0xfffd; 48 status = STATUS_SOME_NOT_MAPPED; 49 } 50 else if (ch >= 0xd800 && ch <= 0xdbff) 51 { 52 if (i + 1 < uni_bytes / sizeof(WCHAR)) 53 { 54 ch -= 0xd800; 55 ch <<= 10; 56 if (uni_src[i + 1] >= 0xdc00 && uni_src[i + 1] <= 0xdfff) 57 { 58 ch |= uni_src[i + 1] - 0xdc00; 59 ch += 0x010000; 60 i++; 61 } 62 else 63 { 64 ch = 0xfffd; 65 status = STATUS_SOME_NOT_MAPPED; 66 } 67 } 68 else 69 { 70 ch = 0xfffd; 71 status = STATUS_SOME_NOT_MAPPED; 72 } 73 } 74 75 /* encode ch as UTF-8 */ 76 ASSERT(ch <= 0x10ffff); 77 if (ch < 0x80) 78 { 79 utf8_ch[0] = ch & 0x7f; 80 utf8_ch_len = 1; 81 } 82 else if (ch < 0x800) 83 { 84 utf8_ch[0] = 0xc0 | (ch >> 6 & 0x1f); 85 utf8_ch[1] = 0x80 | (ch >> 0 & 0x3f); 86 utf8_ch_len = 2; 87 } 88 else if (ch < 0x10000) 89 { 90 utf8_ch[0] = 0xe0 | (ch >> 12 & 0x0f); 91 utf8_ch[1] = 0x80 | (ch >> 6 & 0x3f); 92 utf8_ch[2] = 0x80 | (ch >> 0 & 0x3f); 93 utf8_ch_len = 3; 94 } 95 else if (ch < 0x200000) 96 { 97 utf8_ch[0] = 0xf0 | (ch >> 18 & 0x07); 98 utf8_ch[1] = 0x80 | (ch >> 12 & 0x3f); 99 utf8_ch[2] = 0x80 | (ch >> 6 & 0x3f); 100 utf8_ch[3] = 0x80 | (ch >> 0 & 0x3f); 101 utf8_ch_len = 4; 102 } 103 104 if (!utf8_dest) 105 { 106 written += utf8_ch_len; 107 continue; 108 } 109 110 if (utf8_bytes_max >= utf8_ch_len) 111 { 112 memcpy(utf8_dest, utf8_ch, utf8_ch_len); 113 utf8_dest += utf8_ch_len; 114 utf8_bytes_max -= utf8_ch_len; 115 written += utf8_ch_len; 116 } 117 else 118 { 119 utf8_bytes_max = 0; 120 status = STATUS_BUFFER_TOO_SMALL; 121 } 122 } 123 124 *utf8_bytes_written = written; 125 return status; 126 } 127 128 129 /****************************************************************************** 130 * RtlUTF8ToUnicodeN [NTDLL.@] 131 */ 132 NTSTATUS NTAPI RtlUTF8ToUnicodeN(WCHAR *uni_dest, ULONG uni_bytes_max, 133 ULONG *uni_bytes_written, 134 const CHAR *utf8_src, ULONG utf8_bytes) 135 { 136 NTSTATUS status; 137 ULONG i, j; 138 ULONG written; 139 ULONG ch; 140 ULONG utf8_trail_bytes; 141 WCHAR utf16_ch[3]; 142 ULONG utf16_ch_len; 143 144 if (!utf8_src) 145 return STATUS_INVALID_PARAMETER_4; 146 if (!uni_bytes_written) 147 return STATUS_INVALID_PARAMETER; 148 149 written = 0; 150 status = STATUS_SUCCESS; 151 152 for (i = 0; i < utf8_bytes; i++) 153 { 154 /* read UTF-8 lead byte */ 155 ch = (BYTE)utf8_src[i]; 156 utf8_trail_bytes = 0; 157 if (ch >= 0xf5) 158 { 159 ch = 0xfffd; 160 status = STATUS_SOME_NOT_MAPPED; 161 } 162 else if (ch >= 0xf0) 163 { 164 ch &= 0x07; 165 utf8_trail_bytes = 3; 166 } 167 else if (ch >= 0xe0) 168 { 169 ch &= 0x0f; 170 utf8_trail_bytes = 2; 171 } 172 else if (ch >= 0xc2) 173 { 174 ch &= 0x1f; 175 utf8_trail_bytes = 1; 176 } 177 else if (ch >= 0x80) 178 { 179 /* overlong or trail byte */ 180 ch = 0xfffd; 181 status = STATUS_SOME_NOT_MAPPED; 182 } 183 184 /* read UTF-8 trail bytes */ 185 if (i + utf8_trail_bytes < utf8_bytes) 186 { 187 for (j = 0; j < utf8_trail_bytes; j++) 188 { 189 if ((utf8_src[i + 1] & 0xc0) == 0x80) 190 { 191 ch <<= 6; 192 ch |= utf8_src[i + 1] & 0x3f; 193 i++; 194 } 195 else 196 { 197 ch = 0xfffd; 198 utf8_trail_bytes = 0; 199 status = STATUS_SOME_NOT_MAPPED; 200 break; 201 } 202 } 203 } 204 else 205 { 206 ch = 0xfffd; 207 utf8_trail_bytes = 0; 208 status = STATUS_SOME_NOT_MAPPED; 209 i = utf8_bytes; 210 } 211 212 /* encode ch as UTF-16 */ 213 if ((ch > 0x10ffff) || 214 (ch >= 0xd800 && ch <= 0xdfff) || 215 (utf8_trail_bytes == 2 && ch < 0x00800) || 216 (utf8_trail_bytes == 3 && ch < 0x10000)) 217 { 218 /* invalid codepoint or overlong encoding */ 219 utf16_ch[0] = 0xfffd; 220 utf16_ch[1] = 0xfffd; 221 utf16_ch[2] = 0xfffd; 222 utf16_ch_len = utf8_trail_bytes; 223 status = STATUS_SOME_NOT_MAPPED; 224 } 225 else if (ch >= 0x10000) 226 { 227 /* surrogate pair */ 228 ch -= 0x010000; 229 utf16_ch[0] = 0xd800 + (ch >> 10 & 0x3ff); 230 utf16_ch[1] = 0xdc00 + (ch >> 0 & 0x3ff); 231 utf16_ch_len = 2; 232 } 233 else 234 { 235 /* single unit */ 236 utf16_ch[0] = ch; 237 utf16_ch_len = 1; 238 } 239 240 if (!uni_dest) 241 { 242 written += utf16_ch_len; 243 continue; 244 } 245 246 for (j = 0; j < utf16_ch_len; j++) 247 { 248 if (uni_bytes_max >= sizeof(WCHAR)) 249 { 250 *uni_dest++ = utf16_ch[j]; 251 uni_bytes_max -= sizeof(WCHAR); 252 written++; 253 } 254 else 255 { 256 uni_bytes_max = 0; 257 status = STATUS_BUFFER_TOO_SMALL; 258 } 259 } 260 } 261 262 *uni_bytes_written = written * sizeof(WCHAR); 263 return status; 264 } 265