1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 18 июн. 2018 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #include <core/io/charset.h> 23 #include <dsp/endian.h> 24 #include <errno.h> 25 #include <stdlib.h> 26 #include <stdio.h> 27 28 namespace lsp 29 { 30 #if defined(PLATFORM_WINDOWS) 31 typedef struct codepage_t 32 { 33 const char *name; 34 size_t codepage; 35 } codepage_t; 36 37 // This is a generated list of codepages supported by Windows, 38 // see script: scripts/perl/core/oi/charset/gen_cp.pl 39 static const codepage_t win_codepages[] = { 40 { "037", 37 }, 41 { "10000", 10000 }, 42 { "10001", 10001 }, 43 { "10002", 10002 }, 44 { "10003", 10003 }, 45 { "10004", 10004 }, 46 { "10005", 10005 }, 47 { "10006", 10006 }, 48 { "10007", 10007 }, 49 { "10008", 10008 }, 50 { "10010", 10010 }, 51 { "10017", 10017 }, 52 { "10021", 10021 }, 53 { "10029", 10029 }, 54 { "10079", 10079 }, 55 { "10081", 10081 }, 56 { "10082", 10082 }, 57 { "1026", 1026 }, 58 { "1047", 1047 }, 59 { "1140", 1140 }, 60 { "1141", 1141 }, 61 { "1142", 1142 }, 62 { "1143", 1143 }, 63 { "1144", 1144 }, 64 { "1145", 1145 }, 65 { "1146", 1146 }, 66 { "1147", 1147 }, 67 { "1148", 1148 }, 68 { "1149", 1149 }, 69 { "1200", 1200 }, 70 { "12000", 12000 }, 71 { "12001", 12001 }, 72 { "1201", 1201 }, 73 { "1250", 1250 }, 74 { "1251", 1251 }, 75 { "1252", 1252 }, 76 { "1253", 1253 }, 77 { "1254", 1254 }, 78 { "1255", 1255 }, 79 { "1256", 1256 }, 80 { "1257", 1257 }, 81 { "1258", 1258 }, 82 { "1361", 1361 }, 83 { "20000", 20000 }, 84 { "20001", 20001 }, 85 { "20002", 20002 }, 86 { "20003", 20003 }, 87 { "20004", 20004 }, 88 { "20005", 20005 }, 89 { "20105", 20105 }, 90 { "20106", 20106 }, 91 { "20107", 20107 }, 92 { "20108", 20108 }, 93 { "20127", 20127 }, 94 { "20261", 20261 }, 95 { "20269", 20269 }, 96 { "20273", 20273 }, 97 { "20277", 20277 }, 98 { "20278", 20278 }, 99 { "20280", 20280 }, 100 { "20284", 20284 }, 101 { "20285", 20285 }, 102 { "20290", 20290 }, 103 { "20297", 20297 }, 104 { "20420", 20420 }, 105 { "20423", 20423 }, 106 { "20424", 20424 }, 107 { "20833", 20833 }, 108 { "20838", 20838 }, 109 { "20866", 20866 }, 110 { "20871", 20871 }, 111 { "20880", 20880 }, 112 { "20905", 20905 }, 113 { "20924", 20924 }, 114 { "20932", 20932 }, 115 { "20936", 20936 }, 116 { "20949", 20949 }, 117 { "21025", 21025 }, 118 { "21027", 21027 }, 119 { "21866", 21866 }, 120 { "28591", 28591 }, 121 { "28592", 28592 }, 122 { "28593", 28593 }, 123 { "28594", 28594 }, 124 { "28595", 28595 }, 125 { "28596", 28596 }, 126 { "28597", 28597 }, 127 { "28598", 28598 }, 128 { "28599", 28599 }, 129 { "28603", 28603 }, 130 { "28605", 28605 }, 131 { "29001", 29001 }, 132 { "37", 37 }, 133 { "38598", 38598 }, 134 { "437", 437 }, 135 { "500", 500 }, 136 { "50220", 50220 }, 137 { "50221", 50221 }, 138 { "50222", 50222 }, 139 { "50225", 50225 }, 140 { "50227", 50227 }, 141 { "50229", 50229 }, 142 { "50930", 50930 }, 143 { "50931", 50931 }, 144 { "50933", 50933 }, 145 { "50935", 50935 }, 146 { "50936", 50936 }, 147 { "50937", 50937 }, 148 { "50939", 50939 }, 149 { "51932", 51932 }, 150 { "51936", 51936 }, 151 { "51949", 51949 }, 152 { "51950", 51950 }, 153 { "52936", 52936 }, 154 { "54936", 54936 }, 155 { "57002", 57002 }, 156 { "57003", 57003 }, 157 { "57004", 57004 }, 158 { "57005", 57005 }, 159 { "57006", 57006 }, 160 { "57007", 57007 }, 161 { "57008", 57008 }, 162 { "57009", 57009 }, 163 { "57010", 57010 }, 164 { "57011", 57011 }, 165 { "65000", 65000 }, 166 { "65001", 65001 }, 167 { "708", 708 }, 168 { "709", 709 }, 169 { "710", 710 }, 170 { "720", 720 }, 171 { "737", 737 }, 172 { "775", 775 }, 173 { "850", 850 }, 174 { "852", 852 }, 175 { "855", 855 }, 176 { "857", 857 }, 177 { "858", 858 }, 178 { "860", 860 }, 179 { "861", 861 }, 180 { "862", 862 }, 181 { "863", 863 }, 182 { "864", 864 }, 183 { "865", 865 }, 184 { "866", 866 }, 185 { "869", 869 }, 186 { "870", 870 }, 187 { "874", 874 }, 188 { "875", 875 }, 189 { "932", 932 }, 190 { "936", 936 }, 191 { "949", 949 }, 192 { "950", 950 }, 193 { "asmo-708", 708 }, 194 { "asmo708", 708 }, 195 { "big5", 950 }, 196 { "cp-1025", 21025 }, 197 { "cp-1250", 1250 }, 198 { "cp-1251", 1251 }, 199 { "cp-1252", 1252 }, 200 { "cp-1253", 1253 }, 201 { "cp-1254", 1254 }, 202 { "cp-1255", 1255 }, 203 { "cp-1256", 1256 }, 204 { "cp-1257", 1257 }, 205 { "cp-1258", 1258 }, 206 { "cp-21027", 21027 }, 207 { "cp-50229", 50229 }, 208 { "cp-50930", 50930 }, 209 { "cp-50931", 50931 }, 210 { "cp-50933", 50933 }, 211 { "cp-50935", 50935 }, 212 { "cp-50936", 50936 }, 213 { "cp-50937", 50937 }, 214 { "cp-50939", 50939 }, 215 { "cp-51950", 51950 }, 216 { "cp-709", 709 }, 217 { "cp-710", 710 }, 218 { "cp-866", 866 }, 219 { "cp-874", 874 }, 220 { "cp-875", 875 }, 221 { "cp1025", 21025 }, 222 { "cp1250", 1250 }, 223 { "cp1251", 1251 }, 224 { "cp1252", 1252 }, 225 { "cp1253", 1253 }, 226 { "cp1254", 1254 }, 227 { "cp1255", 1255 }, 228 { "cp1256", 1256 }, 229 { "cp1257", 1257 }, 230 { "cp1258", 1258 }, 231 { "cp21027", 21027 }, 232 { "cp50229", 50229 }, 233 { "cp50930", 50930 }, 234 { "cp50931", 50931 }, 235 { "cp50933", 50933 }, 236 { "cp50935", 50935 }, 237 { "cp50936", 50936 }, 238 { "cp50937", 50937 }, 239 { "cp50939", 50939 }, 240 { "cp51950", 51950 }, 241 { "cp709", 709 }, 242 { "cp710", 710 }, 243 { "cp866", 866 }, 244 { "cp874", 874 }, 245 { "cp875", 875 }, 246 { "csiso2022jp", 50221 }, 247 { "dos-720", 720 }, 248 { "dos-862", 862 }, 249 { "dos720", 720 }, 250 { "dos862", 862 }, 251 { "euc-cn", 51936 }, 252 { "euc-jp", 51932 }, 253 { "euc-kr", 51949 }, 254 { "gb18030", 54936 }, 255 { "gb2312", 936 }, 256 { "hz-gb-2312", 52936 }, 257 { "hz-gb2312", 52936 }, 258 { "ibm-thai", 20838 }, 259 { "ibm00858", 858 }, 260 { "ibm00924", 20924 }, 261 { "ibm01047", 1047 }, 262 { "ibm01140", 1140 }, 263 { "ibm01141", 1141 }, 264 { "ibm01142", 1142 }, 265 { "ibm01143", 1143 }, 266 { "ibm01144", 1144 }, 267 { "ibm01145", 1145 }, 268 { "ibm01146", 1146 }, 269 { "ibm01147", 1147 }, 270 { "ibm01148", 1148 }, 271 { "ibm01149", 1149 }, 272 { "ibm037", 37 }, 273 { "ibm1026", 1026 }, 274 { "ibm273", 20273 }, 275 { "ibm277", 20277 }, 276 { "ibm278", 20278 }, 277 { "ibm280", 20280 }, 278 { "ibm284", 20284 }, 279 { "ibm285", 20285 }, 280 { "ibm290", 20290 }, 281 { "ibm297", 20297 }, 282 { "ibm420", 20420 }, 283 { "ibm423", 20423 }, 284 { "ibm424", 20424 }, 285 { "ibm437", 437 }, 286 { "ibm500", 500 }, 287 { "ibm737", 737 }, 288 { "ibm775", 775 }, 289 { "ibm850", 850 }, 290 { "ibm852", 852 }, 291 { "ibm855", 855 }, 292 { "ibm857", 857 }, 293 { "ibm860", 860 }, 294 { "ibm861", 861 }, 295 { "ibm863", 863 }, 296 { "ibm864", 864 }, 297 { "ibm865", 865 }, 298 { "ibm869", 869 }, 299 { "ibm870", 870 }, 300 { "ibm871", 20871 }, 301 { "ibm880", 20880 }, 302 { "ibm905", 20905 }, 303 { "iso-2022-jp", 50222 }, 304 { "iso-2022-kr", 50225 }, 305 { "iso-2022jp", 50222 }, 306 { "iso-2022kr", 50225 }, 307 { "iso-8859-1", 28591 }, 308 { "iso-8859-13", 28603 }, 309 { "iso-8859-15", 28605 }, 310 { "iso-8859-2", 28592 }, 311 { "iso-8859-3", 28593 }, 312 { "iso-8859-4", 28594 }, 313 { "iso-8859-5", 28595 }, 314 { "iso-8859-6", 28596 }, 315 { "iso-8859-7", 28597 }, 316 { "iso-8859-8", 28598 }, 317 { "iso-8859-8-i", 38598 }, 318 { "iso-8859-8i", 38598 }, 319 { "iso-8859-9", 28599 }, 320 { "iso2022-jp", 50222 }, 321 { "iso2022-kr", 50225 }, 322 { "iso2022jp", 50222 }, 323 { "iso2022kr", 50225 }, 324 { "iso8859-1", 28591 }, 325 { "iso8859-13", 28603 }, 326 { "iso8859-15", 28605 }, 327 { "iso8859-2", 28592 }, 328 { "iso8859-3", 28593 }, 329 { "iso8859-4", 28594 }, 330 { "iso8859-5", 28595 }, 331 { "iso8859-6", 28596 }, 332 { "iso8859-7", 28597 }, 333 { "iso8859-8", 28598 }, 334 { "iso8859-8-i", 38598 }, 335 { "iso8859-8i", 38598 }, 336 { "iso8859-9", 28599 }, 337 { "johab", 1361 }, 338 { "koi8-r", 20866 }, 339 { "koi8-u", 21866 }, 340 { "koi8r", 20866 }, 341 { "koi8u", 21866 }, 342 { "ks-c-5601-1987", 949 }, 343 { "ks_c_5601-1987", 949 }, 344 { "macintosh", 10000 }, 345 { "shift-jis", 932 }, 346 { "shift_jis", 932 }, 347 { "unicodefffe", 1201 }, 348 { "us-ascii", 20127 }, 349 { "utf-16", 1200 }, 350 { "utf-16be", 1201 }, 351 { "utf-16le", 1200 }, 352 { "utf-32", 12000 }, 353 { "utf-32be", 12001 }, 354 { "utf-32le", 12000 }, 355 { "utf-7", 65000 }, 356 { "utf-8", 65001 }, 357 { "utf16", 1200 }, 358 { "utf16be", 1201 }, 359 { "utf16le", 1200 }, 360 { "utf32", 12000 }, 361 { "utf32be", 12001 }, 362 { "utf32le", 12000 }, 363 { "utf7", 65000 }, 364 { "utf8", 65001 }, 365 { "windows-1250", 1250 }, 366 { "windows-1251", 1251 }, 367 { "windows-1252", 1252 }, 368 { "windows-1253", 1253 }, 369 { "windows-1254", 1254 }, 370 { "windows-1255", 1255 }, 371 { "windows-1256", 1256 }, 372 { "windows-1257", 1257 }, 373 { "windows-1258", 1258 }, 374 { "windows-874", 874 }, 375 { "windows1250", 1250 }, 376 { "windows1251", 1251 }, 377 { "windows1252", 1252 }, 378 { "windows1253", 1253 }, 379 { "windows1254", 1254 }, 380 { "windows1255", 1255 }, 381 { "windows1256", 1256 }, 382 { "windows1257", 1257 }, 383 { "windows1258", 1258 }, 384 { "windows874", 874 }, 385 { "x-chinese-cns", 20000 }, 386 { "x-chinese-eten", 20002 }, 387 { "x-chinese_cns", 20000 }, 388 { "x-cp-20001", 20001 }, 389 { "x-cp-20003", 20003 }, 390 { "x-cp-20004", 20004 }, 391 { "x-cp-20005", 20005 }, 392 { "x-cp-20261", 20261 }, 393 { "x-cp-20269", 20269 }, 394 { "x-cp-20936", 20936 }, 395 { "x-cp-20949", 20949 }, 396 { "x-cp-50227", 50227 }, 397 { "x-cp20001", 20001 }, 398 { "x-cp20003", 20003 }, 399 { "x-cp20004", 20004 }, 400 { "x-cp20005", 20005 }, 401 { "x-cp20261", 20261 }, 402 { "x-cp20269", 20269 }, 403 { "x-cp20936", 20936 }, 404 { "x-cp20949", 20949 }, 405 { "x-cp50227", 50227 }, 406 { "x-ebcdic-koreanextended", 20833 }, 407 { "x-europa", 29001 }, 408 { "x-ia5", 20105 }, 409 { "x-ia5-german", 20106 }, 410 { "x-ia5-norwegian", 20108 }, 411 { "x-ia5-swedish", 20107 }, 412 { "x-ia5german", 20106 }, 413 { "x-ia5norwegian", 20108 }, 414 { "x-ia5swedish", 20107 }, 415 { "x-iscii-as", 57006 }, 416 { "x-iscii-be", 57003 }, 417 { "x-iscii-de", 57002 }, 418 { "x-iscii-gu", 57010 }, 419 { "x-iscii-ka", 57008 }, 420 { "x-iscii-ma", 57009 }, 421 { "x-iscii-or", 57007 }, 422 { "x-iscii-pa", 57011 }, 423 { "x-iscii-ta", 57004 }, 424 { "x-iscii-te", 57005 }, 425 { "x-mac-arabic", 10004 }, 426 { "x-mac-ce", 10029 }, 427 { "x-mac-chinesesimp", 10008 }, 428 { "x-mac-chinesetrad", 10002 }, 429 { "x-mac-croatian", 10082 }, 430 { "x-mac-cyrillic", 10007 }, 431 { "x-mac-greek", 10006 }, 432 { "x-mac-hebrew", 10005 }, 433 { "x-mac-icelandic", 10079 }, 434 { "x-mac-japanese", 10001 }, 435 { "x-mac-korean", 10003 }, 436 { "x-mac-romanian", 10010 }, 437 { "x-mac-thai", 10021 }, 438 { "x-mac-turkish", 10081 }, 439 { "x-mac-ukrainian", 10017 }, 440 { "x_chinese-eten", 20002 } 441 }; 442 get_codepage(LCID locale,bool ansi)443 ssize_t get_codepage(LCID locale, bool ansi) 444 { 445 char buf[32]; 446 447 int res = GetLocaleInfoA(locale, (ansi) ? LOCALE_IDEFAULTANSICODEPAGE : LOCALE_IDEFAULTCODEPAGE, buf, sizeof(buf)-1); 448 if (res == 0) 449 { 450 switch (GetLastError()) 451 { 452 case ERROR_INSUFFICIENT_BUFFER: 453 return -STATUS_NO_MEM; 454 case ERROR_INVALID_FLAGS: 455 case ERROR_INVALID_PARAMETER: 456 return -STATUS_BAD_ARGUMENTS; 457 default: 458 return -STATUS_UNKNOWN_ERR; 459 } 460 } 461 462 errno = 0; 463 ssize_t cp_num = strtol(buf, NULL, 10); 464 if (errno != 0) 465 return -STATUS_UNSUPPORTED_FORMAT; 466 return cp_num; 467 } 468 codepage_from_name(const char * charset)469 ssize_t codepage_from_name(const char *charset) 470 { 471 if (charset != NULL) 472 { 473 // Do lower-case the character set 474 size_t n = strlen(charset) + 1; 475 char *lower = static_cast<char *>(alloca(n)); 476 for (size_t i=0; i<n; ++i) 477 lower[i] = tolower(charset[i]); 478 479 // Perform binary search of character set 480 size_t first = 0, last = sizeof(win_codepages)/sizeof(codepage_t); 481 while (first < last) 482 { 483 size_t middle = (first + last) >> 1; 484 int n = strcmp(lower, win_codepages[middle].name); 485 if (n == 0) 486 return win_codepages[middle].codepage; 487 else if (n < 0) 488 last = middle; 489 else 490 first = middle + 1; 491 } 492 493 return -1; 494 } 495 496 // printf("LOCALE_CUSTOM_DEFAULT = %d\n", int(get_codepage(LOCALE_CUSTOM_DEFAULT))); 497 // printf("LOCALE_USER_DEFAULT = %d\n", int(get_codepage(LOCALE_USER_DEFAULT))); 498 // printf("LOCALE_SYSTEM_DEFAULT = %d\n", int(get_codepage(LOCALE_SYSTEM_DEFAULT))); 499 // printf("LOCALE_CUSTOM_UNSPECIFIED = %d\n", int(get_codepage(LOCALE_CUSTOM_UNSPECIFIED))); 500 // printf("LOCALE_CUSTOM_UI_DEFAULT = %d\n", int(get_codepage(LOCALE_CUSTOM_UI_DEFAULT))); 501 // printf("LOCALE_INVARIANT = %d\n", int(get_codepage(LOCALE_INVARIANT))); 502 // printf("GetConsoleWindow() = %d\n", int(GetConsoleWindow())); 503 // printf("GetConsoleOutputCP() = %d\n", int(GetConsoleOutputCP())); 504 // fflush(stdout); 505 506 // Obtain system character set 507 //ssize_t cp = (GetConsoleWindow() != 0) ? GetConsoleOutputCP() : get_codepage(LOCALE_CUSTOM_DEFAULT); 508 ssize_t cp = get_codepage(LOCALE_CUSTOM_DEFAULT); 509 if (cp < 0) 510 cp = get_codepage(LOCALE_USER_DEFAULT); 511 if (cp < 0) 512 cp = get_codepage(LOCALE_SYSTEM_DEFAULT); 513 return cp; 514 } 515 516 #else 517 iconv_t init_iconv_to_wchar_t(const char *charset) 518 { 519 // Fetch system character set if it is not set 520 if (charset == NULL) 521 { 522 // Save current locale 523 char *current = setlocale(LC_CTYPE, NULL); 524 if (current == NULL) 525 return iconv_t(-1); 526 size_t len = strlen(current) + 1; 527 char *psaved = static_cast<char *>(alloca(len)); 528 ::memcpy(psaved, current, len); 529 charset = psaved; 530 531 // Get system locale 532 current = setlocale(LC_CTYPE, ""); 533 if (current != NULL) 534 current = strchr(current, '.'); 535 536 // Scan for character set 537 if (current != NULL) 538 { 539 len = strlen(current); 540 psaved = static_cast<char *>(alloca(len)); 541 ::memcpy(psaved, ¤t[1], len); 542 } 543 544 // Restore saved locale 545 setlocale(LC_CTYPE, charset); 546 547 // Update locale 548 charset = (current != NULL) ? psaved : "UTF-8"; 549 } 550 551 // Open conversion 552 iconv_t res = iconv_open(__IF_LEBE("UTF-32LE", "UTF-32BE"), charset); 553 if (res != iconv_t(-1)) 554 return res; 555 556 res = iconv_open(__IF_LEBE("UTF-32LE", "UTF-32BE"), "UTF-8"); 557 if (res != iconv_t(-1)) 558 return res; 559 560 return iconv_open("WCHAR_T", "UTF-8"); 561 } 562 563 iconv_t init_iconv_from_wchar_t(const char *charset) 564 { 565 // Fetch system charset if it is not set 566 if (charset == NULL) 567 { 568 // Save current locale 569 char *current = setlocale(LC_CTYPE, NULL); 570 if (current == NULL) 571 return iconv_t(-1); 572 size_t len = strlen(current) + 1; 573 char *psaved = static_cast<char *>(alloca(len)); 574 ::memcpy(psaved, current, len); 575 charset = psaved; 576 577 // Get system locale 578 current = setlocale(LC_CTYPE, ""); 579 if (current != NULL) 580 current = strchr(current, '.'); 581 582 // Scan for character set 583 if (current != NULL) 584 { 585 len = strlen(current); 586 psaved = static_cast<char *>(alloca(len)); 587 ::memcpy(psaved, ¤t[1], len); 588 } 589 590 // Restore saved locale 591 setlocale(LC_CTYPE, charset); 592 593 // Update charset 594 charset = (current != NULL) ? psaved : "UTF-8"; 595 } 596 597 // Open conversion 598 iconv_t res = iconv_open(charset, __IF_LEBE("UTF-32LE", "UTF-32BE")); 599 if (res != iconv_t(-1)) 600 return res; 601 602 res = iconv_open("UTF-8", __IF_LEBE("UTF-32LE", "UTF-32BE")); 603 if (res != iconv_t(-1)) 604 return res; 605 606 return iconv_open("UTF-8", "WCHAR_T"); 607 } 608 #endif 609 610 //------------------------------------------------------------------------- 611 // UTF-16 helper routines read_utf16le_codepoint(const lsp_utf16_t ** str)612 lsp_utf32_t read_utf16le_codepoint(const lsp_utf16_t **str) 613 { 614 uint32_t cp, sc; 615 const lsp_utf16_t *s = *str; 616 617 cp = LE_TO_CPU(*(s++)); 618 if (cp == 0) 619 return cp; 620 621 sc = cp & 0xfc00; 622 if (sc == 0xd800) // cp = Surrogate high 623 { 624 sc = LE_TO_CPU(*s); 625 if ((sc & 0xfc00) == 0xdc00) 626 { 627 ++s; 628 cp = 0x10000 + (((cp & 0x3ff) << 10) | (sc & 0x3ff)); 629 } 630 else 631 cp = 0xfffd; 632 } 633 else if (sc == 0xdc00) // Surrogate low? 634 { 635 sc = LE_TO_CPU(*s); 636 if ((sc & 0xfc00) == 0xd800) 637 { 638 ++s; 639 cp = 0x10000 + (((sc & 0x3ff) << 10) | (cp & 0x3ff)); 640 } 641 else 642 cp = 0xfffd; 643 } 644 645 *str = s; 646 return cp; 647 } 648 read_utf16be_codepoint(const lsp_utf16_t ** str)649 lsp_utf32_t read_utf16be_codepoint(const lsp_utf16_t **str) 650 { 651 uint32_t cp, sc; 652 const lsp_utf16_t *s = *str; 653 654 cp = BE_TO_CPU(*(s++)); 655 if (cp == 0) 656 return cp; 657 658 sc = cp & 0xfc00; 659 if (sc == 0xd800) // cp = Surrogate high 660 { 661 sc = BE_TO_CPU(*s); 662 if ((sc & 0xfc00) == 0xdc00) 663 { 664 ++s; 665 cp = 0x10000 + (((cp & 0x3ff) << 10) | (sc & 0x3ff)); 666 } 667 else 668 cp = 0xfffd; 669 } 670 else if (sc == 0xdc00) // Surrogate low? 671 { 672 sc = BE_TO_CPU(*s); 673 if ((sc & 0xfc00) == 0xd800) 674 { 675 ++s; 676 cp = 0x10000 + (((sc & 0x3ff) << 10) | (cp & 0x3ff)); 677 } 678 else 679 cp = 0xfffd; 680 } 681 682 *str = s; 683 return cp; 684 } 685 read_utf16le_streaming(const lsp_utf16_t ** str,size_t * nsrc,bool force)686 lsp_utf32_t read_utf16le_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force) 687 { 688 if (*nsrc <= 0) 689 return LSP_UTF32_EOF; 690 691 uint32_t cp, sc; 692 const lsp_utf16_t *s = *str; 693 694 cp = LE_TO_CPU(*(s++)); 695 sc = cp & 0xfc00; 696 if (sc == 0xd800) // cp = Surrogate high 697 { 698 if (*nsrc > 1) 699 sc = LE_TO_CPU(*s); 700 else if (force) 701 sc = 0; 702 else 703 return LSP_UTF32_EOF; 704 705 if ((sc & 0xfc00) == 0xdc00) 706 { 707 ++s; 708 cp = 0x10000 + (((cp & 0x3ff) << 10) | (sc & 0x3ff)); 709 } 710 else 711 cp = 0xfffd; 712 } 713 else if (sc == 0xdc00) // Surrogate low? 714 { 715 if (*nsrc > 1) 716 sc = LE_TO_CPU(*s); 717 else if (force) 718 sc = 0; 719 else 720 return LSP_UTF32_EOF; 721 722 if ((sc & 0xfc00) == 0xd800) 723 { 724 ++s; 725 cp = 0x10000 + (((sc & 0x3ff) << 10) | (cp & 0x3ff)); 726 } 727 else 728 cp = 0xfffd; 729 } 730 731 *nsrc -= (s - *str); 732 *str = s; 733 return cp; 734 } 735 read_utf16be_streaming(const lsp_utf16_t ** str,size_t * nsrc,bool force)736 lsp_utf32_t read_utf16be_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force) 737 { 738 if (*nsrc <= 0) 739 return LSP_UTF32_EOF; 740 741 uint32_t cp, sc; 742 const lsp_utf16_t *s = *str; 743 744 cp = BE_TO_CPU(*(s++)); 745 sc = cp & 0xfc00; 746 if (sc == 0xd800) // cp = Surrogate high 747 { 748 if (*nsrc > 1) 749 sc = BE_TO_CPU(*s); 750 else if (force) 751 sc = 0; 752 else 753 return LSP_UTF32_EOF; 754 755 if ((sc & 0xfc00) == 0xdc00) 756 { 757 ++s; 758 cp = 0x10000 + (((cp & 0x3ff) << 10) | (sc & 0x3ff)); 759 } 760 else 761 cp = 0xfffd; 762 } 763 else if (sc == 0xdc00) // Surrogate low? 764 { 765 if (*nsrc > 1) 766 sc = BE_TO_CPU(*s); 767 else if (force) 768 sc = 0; 769 else 770 return LSP_UTF32_EOF; 771 772 if ((sc & 0xfc00) == 0xd800) 773 { 774 ++s; 775 cp = 0x10000 + (((sc & 0x3ff) << 10) | (cp & 0x3ff)); 776 } 777 else 778 cp = 0xfffd; 779 } 780 781 *nsrc -= (s - *str); 782 *str = s; 783 return cp; 784 } 785 sizeof_utf16(lsp_utf32_t cp)786 inline size_t sizeof_utf16(lsp_utf32_t cp) 787 { 788 return (cp < 0x10000) ? 2 : 4; 789 } 790 count_utf16(lsp_utf32_t cp)791 inline size_t count_utf16(lsp_utf32_t cp) 792 { 793 return (cp < 0x10000) ? 1 : 2; 794 } 795 write_utf16le_codepoint(lsp_utf16_t ** str,lsp_utf32_t cp)796 void write_utf16le_codepoint(lsp_utf16_t **str, lsp_utf32_t cp) 797 { 798 lsp_utf16_t *dst = *str; 799 if (cp < 0x10000) 800 *(dst++) = CPU_TO_LE(lsp_utf16_t(cp)); 801 else 802 { 803 cp -= 0x10000; 804 dst[0] = CPU_TO_LE(lsp_utf16_t(0xd800 | (cp >> 10))); 805 dst[1] = CPU_TO_LE(lsp_utf16_t(0xdc00 | (cp & 0x3ff))); 806 dst += 2; 807 } 808 *str = dst; 809 } 810 write_utf16be_codepoint(lsp_utf16_t ** str,lsp_utf32_t cp)811 void write_utf16be_codepoint(lsp_utf16_t **str, lsp_utf32_t cp) 812 { 813 lsp_utf16_t *dst = *str; 814 if (cp < 0x10000) 815 *(dst++) = CPU_TO_BE(lsp_utf16_t(cp)); 816 else 817 { 818 cp -= 0x10000; 819 dst[0] = CPU_TO_BE(lsp_utf16_t(0xd800 | (cp >> 10))); 820 dst[1] = CPU_TO_BE(lsp_utf16_t(0xdc00 | (cp & 0x3ff))); 821 dst += 2; 822 } 823 *str = dst; 824 } 825 826 //------------------------------------------------------------------------- 827 // UTF-8 helper routines read_utf8_codepoint(const char ** str)828 lsp_utf32_t read_utf8_codepoint(const char **str) 829 { 830 lsp_utf32_t cp, sp; 831 size_t bytes; 832 const char *s = *str; 833 834 // Decode primary byte 835 cp = uint8_t(*s); 836 if (cp <= 0x7f) 837 { 838 *str = (cp == 0) ? s : s+1; 839 return cp; 840 } 841 842 ++s; 843 if ((cp & 0xe0) == 0xc0) // 2 bytes: 110xxxxx 10xxxxxx 844 { 845 cp &= 0x1f; 846 bytes = (cp >= 0x02) ? 1 : 0; 847 } 848 else if ((cp & 0xf0) == 0xe0) // 3 bytes: 1110xxxx 10xxxxxx 10xxxxxx 849 { 850 cp &= 0x0f; 851 bytes = (cp) ? 2 : 0; 852 } 853 else if ((cp & 0xf8) == 0xf0) // 4 bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 854 { 855 cp &= 0x07; 856 bytes = 3; 857 } 858 else 859 bytes = 0; 860 861 // Invalid first byte sequence? 862 if (!bytes) 863 { 864 *str = s; 865 return 0xfffd; 866 } 867 868 // Decode extension bytes 869 for (size_t i=0; i<bytes; ++i) 870 { 871 sp = uint8_t(*s); 872 if ((sp & 0xc0) != 0x80) // Invalid sequence? 873 { 874 *str = (sp == 0) ? s : s+1; 875 return 0xfffd; 876 } 877 cp = (cp << 6) | (sp & 0x3f); 878 ++s; 879 } 880 881 if ((bytes == 3) && (cp < 0x10000)) // Check that 4-byte sequence is valid 882 cp = 0xfffd; 883 else if ((cp >= 0xd800) && (cp < 0xe000)) // Check for surrogates 884 cp = 0xfffd; 885 886 *str = s; 887 return cp; 888 } 889 read_utf8_streaming(const char ** str,size_t * nsrc,bool force)890 lsp_utf32_t read_utf8_streaming(const char **str, size_t *nsrc, bool force) 891 { 892 if (*nsrc <= 0) 893 return LSP_UTF32_EOF; 894 895 lsp_utf32_t cp, sp; 896 size_t bytes; 897 const char *s = *str; 898 899 // Decode primary byte 900 cp = uint8_t(*s); 901 if (cp <= 0x7f) 902 { 903 *str = (cp == 0) ? s : s+1; 904 --(*nsrc); 905 return cp; 906 } 907 908 // Multi-byte sequence 909 ++s; 910 if ((cp & 0xe0) == 0xc0) // 2 bytes: 110xxxxx 10xxxxxx 911 { 912 cp &= 0x1f; 913 bytes = (cp >= 0x02) ? 1 : 0; 914 } 915 else if ((cp & 0xf0) == 0xe0) // 3 bytes: 1110xxxx 10xxxxxx 10xxxxxx 916 { 917 cp &= 0x0f; 918 bytes = (cp) ? 2 : 0; 919 } 920 else if ((cp & 0xf8) == 0xf0) // 4 bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 921 { 922 cp &= 0x07; 923 bytes = 3; 924 } 925 else 926 bytes = 0; 927 928 // Invalid first byte sequence? 929 if (!bytes) 930 { 931 *str = s; 932 --(*nsrc); 933 return 0xfffd; 934 } 935 else if (bytes >= *nsrc) 936 { 937 if (force) 938 { 939 *nsrc = 0; 940 return 0xfffd; 941 } 942 return LSP_UTF32_EOF; 943 } 944 945 // Decode extension bytes 946 for (size_t i=0; i<bytes; ++i) 947 { 948 sp = uint8_t(*s); 949 if ((sp & 0xc0) != 0x80) // Invalid sequence? 950 { 951 if (sp == 0) 952 ++s; 953 *nsrc -= (s - *str); 954 *str = s; 955 return 0xfffd; 956 } 957 cp = (cp << 6) | (sp & 0x3f); 958 ++s; 959 } 960 961 if ((bytes == 3) && (cp < 0x10000)) // Check that 4-byte sequence is valid 962 cp = 0xfffd; 963 else if ((cp >= 0xd800) && (cp < 0xe000)) // Check for surrogates 964 cp = 0xfffd; 965 966 *nsrc -= (s - *str); 967 *str = s; 968 return cp; 969 } 970 sizeof_utf8(lsp_utf32_t cp)971 inline size_t sizeof_utf8(lsp_utf32_t cp) 972 { 973 if (cp >= 0x800) 974 return ((cp < 0x10000) || (cp >= 0x200000)) ? 3 : 4; 975 else 976 return (cp >= 0x80) ? 2 : 1; 977 } 978 count_utf8(lsp_utf32_t cp)979 inline size_t count_utf8(lsp_utf32_t cp) 980 { 981 if (cp >= 0x800) 982 return ((cp < 0x10000) || (cp >= 0x200000)) ? 3 : 4; 983 else 984 return (cp >= 0x80) ? 2 : 1; 985 } 986 write_utf8_codepoint(char ** str,lsp_utf32_t cp)987 void write_utf8_codepoint(char **str, lsp_utf32_t cp) 988 { 989 char *dst = *str; 990 if (cp >= 0x800) // 3-4 bytes 991 { 992 if (cp < 0x10000) // 3 bytes 993 { 994 dst[0] = (cp >> 12) | 0xe0; 995 dst[1] = ((cp >> 6) & 0x3f) | 0x80; 996 dst[2] = (cp & 0x3f) | 0x80; 997 dst += 3; 998 } 999 else if (cp < 0x200000) // 4 bytes 1000 { 1001 dst[0] = (cp >> 16) | 0xf0; 1002 dst[1] = ((cp >> 12) & 0x3f) | 0x80; 1003 dst[2] = ((cp >> 6) & 0x3f) | 0x80; 1004 dst[3] = (cp & 0x3f) | 0x80; 1005 dst += 4; 1006 } 1007 else // Invalid character, emit 3 bytes of 0xfffd code point value 1008 { 1009 dst[0] = 0xef; 1010 dst[1] = 0xbf; 1011 dst[2] = 0xbd; 1012 dst += 3; 1013 } 1014 } 1015 else // 1-2 bytes 1016 { 1017 if (cp >= 0x80) // 2 bytes 1018 { 1019 dst[0] = (cp >> 6) | 0xc0; 1020 dst[1] = (cp & 0x3f) | 0x80; 1021 dst += 2; 1022 } 1023 else // 1 byte 1024 *(dst++) = char(cp); 1025 } 1026 *str = dst; 1027 } 1028 1029 //------------------------------------------------------------------------- 1030 // UTF-8 non-streaming routines utf8_to_utf16le(const char * str)1031 lsp_utf16_t *utf8_to_utf16le(const char *str) 1032 { 1033 // Estimate number of bytes 1034 lsp_utf32_t cp; 1035 size_t bytes = 0; 1036 const char *p = str; 1037 do 1038 { 1039 cp = read_utf8_codepoint(&p); 1040 bytes += sizeof_utf16(cp); 1041 } while (cp != 0); 1042 1043 // Allocate memory 1044 lsp_utf16_t *utf16 = reinterpret_cast<lsp_utf16_t *>(::malloc(bytes)); 1045 if (utf16 == NULL) 1046 return NULL; 1047 1048 // Perform encoding 1049 lsp_utf16_t *dst = utf16; 1050 p = str; 1051 while ((cp = read_utf8_codepoint(&p)) != 0) 1052 write_utf16le_codepoint(&dst, cp); 1053 *dst = 0; 1054 1055 return utf16; 1056 } 1057 utf8_to_utf16be(const char * str)1058 lsp_utf16_t *utf8_to_utf16be(const char *str) 1059 { 1060 // Estimate number of bytes 1061 lsp_utf32_t cp; 1062 size_t bytes = 0; 1063 const char *p = str; 1064 do 1065 { 1066 cp = read_utf8_codepoint(&p); 1067 bytes += sizeof_utf16(cp); 1068 } while (cp != 0); 1069 1070 // Allocate memory 1071 lsp_utf16_t *utf16 = reinterpret_cast<lsp_utf16_t *>(::malloc(bytes)); 1072 if (utf16 == NULL) 1073 return NULL; 1074 1075 // Perform encoding 1076 lsp_utf16_t *dst = utf16; 1077 p = str; 1078 while ((cp = read_utf8_codepoint(&p)) != 0) 1079 write_utf16be_codepoint(&dst, cp); 1080 *dst = 0; 1081 1082 return utf16; 1083 } 1084 utf8_to_utf32le(const char * str)1085 lsp_utf32_t *utf8_to_utf32le(const char *str) 1086 { 1087 // Estimate number of bytes 1088 lsp_utf32_t cp; 1089 size_t bytes = 0; 1090 const char *p = str; 1091 do 1092 { 1093 cp = read_utf8_codepoint(&p); 1094 bytes += sizeof(lsp_utf32_t); 1095 } while (cp != 0); 1096 1097 // Allocate memory 1098 lsp_utf32_t *utf32 = reinterpret_cast<lsp_utf32_t *>(::malloc(bytes)); 1099 if (utf32 == NULL) 1100 return NULL; 1101 1102 // Perform encoding 1103 lsp_utf32_t *dst = utf32; 1104 p = str; 1105 while ((cp = read_utf8_codepoint(&p)) != 0) 1106 *(dst++) = CPU_TO_LE(cp); 1107 *dst = 0; 1108 1109 return utf32; 1110 } 1111 utf8_to_utf32be(const char * str)1112 lsp_utf32_t *utf8_to_utf32be(const char *str) 1113 { 1114 // Estimate number of bytes 1115 lsp_utf32_t cp; 1116 size_t bytes = 0; 1117 const char *p = str; 1118 do 1119 { 1120 cp = read_utf8_codepoint(&p); 1121 bytes += sizeof(lsp_utf32_t); 1122 } while (cp != 0); 1123 1124 // Allocate memory 1125 lsp_utf32_t *utf32 = reinterpret_cast<lsp_utf32_t *>(::malloc(bytes)); 1126 if (utf32 == NULL) 1127 return NULL; 1128 1129 // Perform encoding 1130 lsp_utf32_t *dst = utf32; 1131 p = str; 1132 while ((cp = read_utf8_codepoint(&p)) != 0) 1133 *(dst++) = CPU_TO_BE(cp); 1134 *dst = 0; 1135 1136 return utf32; 1137 } 1138 1139 //------------------------------------------------------------------------- 1140 // UTF-16 non-streaming routines utf16le_to_utf8(const lsp_utf16_t * str)1141 char *utf16le_to_utf8(const lsp_utf16_t *str) 1142 { 1143 // Estimate number of bytes 1144 lsp_utf32_t cp; 1145 size_t bytes = 0; 1146 const lsp_utf16_t *p = str; 1147 do 1148 { 1149 cp = read_utf16le_codepoint(&p); 1150 bytes += sizeof_utf8(cp); 1151 } while (cp != 0); 1152 1153 // Allocate memory 1154 char *utf8 = reinterpret_cast<char *>(::malloc(bytes)); 1155 if (utf8 == NULL) 1156 return NULL; 1157 1158 // Now perform encoding 1159 char *dst = utf8; 1160 p = str; 1161 while ((cp = read_utf16le_codepoint(&p)) != 0) 1162 write_utf8_codepoint(&dst, cp); 1163 *dst = '\0'; 1164 1165 return utf8; 1166 } 1167 utf16be_to_utf8(const lsp_utf16_t * str)1168 char *utf16be_to_utf8(const lsp_utf16_t *str) 1169 { 1170 // Estimate number of bytes 1171 lsp_utf32_t cp; 1172 size_t bytes = 0; 1173 const lsp_utf16_t *p = str; 1174 do 1175 { 1176 cp = read_utf16be_codepoint(&p); 1177 bytes += sizeof_utf8(cp); 1178 } while (cp != 0); 1179 1180 // Allocate memory 1181 char *utf8 = reinterpret_cast<char *>(::malloc(bytes)); 1182 if (utf8 == NULL) 1183 return NULL; 1184 1185 // Now perform encoding 1186 char *dst = utf8; 1187 p = str; 1188 while ((cp = read_utf16be_codepoint(&p)) != 0) 1189 write_utf8_codepoint(&dst, cp); 1190 *dst = '\0'; 1191 1192 return utf8; 1193 } 1194 utf16le_to_utf32le(const lsp_utf16_t * str)1195 lsp_utf32_t *utf16le_to_utf32le(const lsp_utf16_t *str) 1196 { 1197 // Estimate number of bytes 1198 lsp_utf32_t cp; 1199 size_t bytes = 0; 1200 const lsp_utf16_t *p = str; 1201 do 1202 { 1203 cp = read_utf16le_codepoint(&p); 1204 bytes += sizeof(lsp_utf32_t); 1205 } while (cp != 0); 1206 1207 // Allocate memory 1208 lsp_utf32_t *utf32 = reinterpret_cast<lsp_utf32_t *>(::malloc(bytes)); 1209 if (utf32 == NULL) 1210 return NULL; 1211 1212 // Perform encoding 1213 p = str; 1214 lsp_utf32_t *dst= utf32; 1215 while ((cp = read_utf16le_codepoint(&p)) != 0) 1216 *(dst++) = CPU_TO_LE(cp); 1217 *dst = 0; 1218 1219 return utf32; 1220 } 1221 utf16le_to_utf32be(const lsp_utf16_t * str)1222 lsp_utf32_t *utf16le_to_utf32be(const lsp_utf16_t *str) 1223 { 1224 // Estimate number of bytes 1225 lsp_utf32_t cp; 1226 size_t bytes = 0; 1227 const lsp_utf16_t *p = str; 1228 do 1229 { 1230 cp = read_utf16le_codepoint(&p); 1231 bytes += sizeof(lsp_utf32_t); 1232 } while (cp != 0); 1233 1234 // Allocate memory 1235 lsp_utf32_t *utf32 = reinterpret_cast<lsp_utf32_t *>(::malloc(bytes)); 1236 if (utf32 == NULL) 1237 return NULL; 1238 1239 // Perform encoding 1240 p = str; 1241 lsp_utf32_t *dst= utf32; 1242 while ((cp = read_utf16le_codepoint(&p)) != 0) 1243 *(dst++) = CPU_TO_BE(cp); 1244 *dst = 0; 1245 1246 return utf32; 1247 } 1248 utf16be_to_utf32le(const lsp_utf16_t * str)1249 lsp_utf32_t *utf16be_to_utf32le(const lsp_utf16_t *str) 1250 { 1251 // Estimate number of bytes 1252 lsp_utf32_t cp; 1253 size_t bytes = 0; 1254 const lsp_utf16_t *p = str; 1255 do 1256 { 1257 cp = read_utf16be_codepoint(&p); 1258 bytes += sizeof(lsp_utf32_t); 1259 } while (cp != 0); 1260 1261 // Allocate memory 1262 lsp_utf32_t *utf32 = reinterpret_cast<lsp_utf32_t *>(::malloc(bytes)); 1263 if (utf32 == NULL) 1264 return NULL; 1265 1266 // Perform encoding 1267 p = str; 1268 lsp_utf32_t *dst= utf32; 1269 while ((cp = read_utf16be_codepoint(&p)) != 0) 1270 *(dst++) = CPU_TO_LE(cp); 1271 *dst = 0; 1272 1273 return utf32; 1274 } 1275 utf16be_to_utf32be(const lsp_utf16_t * str)1276 lsp_utf32_t *utf16be_to_utf32be(const lsp_utf16_t *str) 1277 { 1278 // Estimate number of bytes 1279 lsp_utf32_t cp; 1280 size_t bytes = 0; 1281 const lsp_utf16_t *p = str; 1282 do 1283 { 1284 cp = read_utf16be_codepoint(&p); 1285 bytes += sizeof(lsp_utf32_t); 1286 } while (cp != 0); 1287 1288 // Allocate memory 1289 lsp_utf32_t *utf32 = reinterpret_cast<lsp_utf32_t *>(::malloc(bytes)); 1290 if (utf32 == NULL) 1291 return NULL; 1292 1293 // Perform encoding 1294 p = str; 1295 lsp_utf32_t *dst= utf32; 1296 while ((cp = read_utf16be_codepoint(&p)) != 0) 1297 *(dst++) = CPU_TO_BE(cp); 1298 *dst = 0; 1299 1300 return utf32; 1301 } 1302 1303 //------------------------------------------------------------------------- 1304 // UTF-32 non-streaming routines utf32le_to_utf8(const lsp_utf32_t * str)1305 char *utf32le_to_utf8(const lsp_utf32_t *str) 1306 { 1307 lsp_utf32_t cp; 1308 size_t bytes = 0; 1309 const lsp_utf32_t *p = str; 1310 1311 // Estimate length 1312 do 1313 { 1314 cp = LE_TO_CPU(*(p++)); 1315 bytes += sizeof_utf8(cp); 1316 } while (cp != 0); 1317 1318 // Allocate memory 1319 char *utf8 = reinterpret_cast<char *>(::malloc(bytes)); 1320 if (utf8 == NULL) 1321 return NULL; 1322 1323 // Perform encoding 1324 p = str; 1325 char *dst = utf8; 1326 while ((cp = *(p++)) != 0) 1327 write_utf8_codepoint(&dst, cp); 1328 1329 *dst = 0; 1330 return utf8; 1331 } 1332 utf32be_to_utf8(const lsp_utf32_t * str)1333 char *utf32be_to_utf8(const lsp_utf32_t *str) 1334 { 1335 lsp_utf32_t cp; 1336 size_t bytes = 0; 1337 const lsp_utf32_t *p = str; 1338 1339 // Estimate length 1340 do 1341 { 1342 cp = BE_TO_CPU(*(p++)); 1343 bytes += sizeof_utf8(cp); 1344 } while (cp != 0); 1345 1346 // Allocate memory 1347 char *utf8 = reinterpret_cast<char *>(::malloc(bytes)); 1348 if (utf8 == NULL) 1349 return NULL; 1350 1351 // Perform encoding 1352 p = str; 1353 char *dst = utf8; 1354 while ((cp = *(p++)) != 0) 1355 write_utf8_codepoint(&dst, cp); 1356 1357 *dst = 0; 1358 return utf8; 1359 } 1360 utf32le_to_utf16le(const lsp_utf32_t * str)1361 lsp_utf16_t *utf32le_to_utf16le(const lsp_utf32_t *str) 1362 { 1363 lsp_utf32_t cp; 1364 size_t bytes = 0; 1365 const lsp_utf32_t *p = str; 1366 1367 // Estimate length 1368 do 1369 { 1370 cp = LE_TO_CPU(*(p++)); 1371 bytes += sizeof_utf16(cp); 1372 } while (cp != 0); 1373 1374 // Allocate memory 1375 lsp_utf16_t *utf16 = reinterpret_cast<lsp_utf16_t *>(::malloc(bytes)); 1376 if (utf16 == NULL) 1377 return NULL; 1378 1379 // Perform encoding 1380 p = str; 1381 lsp_utf16_t *dst= utf16; 1382 while ((cp = *(p++)) != 0) 1383 write_utf16le_codepoint(&dst, cp); 1384 1385 *dst = 0; 1386 return utf16; 1387 } 1388 utf32le_to_utf16be(const lsp_utf32_t * str)1389 lsp_utf16_t *utf32le_to_utf16be(const lsp_utf32_t *str) 1390 { 1391 lsp_utf32_t cp; 1392 size_t bytes = 0; 1393 const lsp_utf32_t *p = str; 1394 1395 // Estimate length 1396 do 1397 { 1398 cp = LE_TO_CPU(*(p++)); 1399 bytes += sizeof_utf16(cp); 1400 } while (cp != 0); 1401 1402 // Allocate memory 1403 lsp_utf16_t *utf16 = reinterpret_cast<lsp_utf16_t *>(::malloc(bytes)); 1404 if (utf16 == NULL) 1405 return NULL; 1406 1407 // Perform encoding 1408 p = str; 1409 lsp_utf16_t *dst= utf16; 1410 while ((cp = *(p++)) != 0) 1411 write_utf16be_codepoint(&dst, cp); 1412 1413 *dst = 0; 1414 return utf16; 1415 } 1416 utf32be_to_utf16le(const lsp_utf32_t * str)1417 lsp_utf16_t *utf32be_to_utf16le(const lsp_utf32_t *str) 1418 { 1419 lsp_utf32_t cp; 1420 size_t bytes = 0; 1421 const lsp_utf32_t *p = str; 1422 1423 // Estimate length 1424 do 1425 { 1426 cp = BE_TO_CPU(*(p++)); 1427 bytes += sizeof_utf16(cp); 1428 } while (cp != 0); 1429 1430 // Allocate memory 1431 lsp_utf16_t *utf16 = reinterpret_cast<lsp_utf16_t *>(::malloc(bytes)); 1432 if (utf16 == NULL) 1433 return NULL; 1434 1435 // Perform encoding 1436 p = str; 1437 lsp_utf16_t *dst= utf16; 1438 while ((cp = *(p++)) != 0) 1439 write_utf16le_codepoint(&dst, cp); 1440 1441 *dst = 0; 1442 return utf16; 1443 } 1444 utf32be_to_utf16be(const lsp_utf32_t * str)1445 lsp_utf16_t *utf32be_to_utf16be(const lsp_utf32_t *str) 1446 { 1447 lsp_utf32_t cp; 1448 size_t bytes = 0; 1449 const lsp_utf32_t *p = str; 1450 1451 // Estimate length 1452 do 1453 { 1454 cp = BE_TO_CPU(*(p++)); 1455 bytes += sizeof_utf16(cp); 1456 } while (cp != 0); 1457 1458 // Allocate memory 1459 lsp_utf16_t *utf16 = reinterpret_cast<lsp_utf16_t *>(::malloc(bytes)); 1460 if (utf16 == NULL) 1461 return NULL; 1462 1463 // Perform encoding 1464 p = str; 1465 lsp_utf16_t *dst= utf16; 1466 while ((cp = *(p++)) != 0) 1467 write_utf16be_codepoint(&dst, cp); 1468 1469 *dst = 0; 1470 return utf16; 1471 } 1472 1473 //------------------------------------------------------------------------- 1474 // UTF-8 streaming routines utf8_to_utf16le(lsp_utf16_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)1475 size_t utf8_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) 1476 { 1477 lsp_utf32_t cp; 1478 size_t processed = 0; 1479 1480 while (*ndst > 0) 1481 { 1482 // Read code point 1483 size_t nin = *nsrc; 1484 cp = read_utf8_streaming(&src, &nin, force); 1485 if (cp == LSP_UTF32_EOF) // No data ? 1486 break; 1487 1488 // Encode code point 1489 size_t nout = count_utf16(cp); 1490 if (nout > *ndst) 1491 break; 1492 write_utf16le_codepoint(&dst, cp); 1493 *nsrc = nin; 1494 *ndst -= nout; 1495 1496 // Update statistics 1497 ++processed; 1498 } 1499 1500 return processed; 1501 } 1502 utf8_to_utf16be(lsp_utf16_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)1503 size_t utf8_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) 1504 { 1505 lsp_utf32_t cp; 1506 size_t processed = 0; 1507 1508 while (*ndst > 0) 1509 { 1510 // Read code point 1511 size_t nin = *nsrc; 1512 cp = read_utf8_streaming(&src, &nin, force); 1513 if (cp == LSP_UTF32_EOF) // No data ? 1514 break; 1515 1516 // Encode code point 1517 size_t nout = count_utf16(cp); 1518 if (nout > *ndst) 1519 break; 1520 write_utf16be_codepoint(&dst, cp); 1521 *nsrc = nin; 1522 *ndst -= nout; 1523 1524 // Update statistics 1525 ++processed; 1526 } 1527 1528 return processed; 1529 } 1530 utf8_to_utf32le(lsp_utf32_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)1531 size_t utf8_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) 1532 { 1533 lsp_utf32_t cp; 1534 size_t processed = 0; 1535 1536 while (*ndst > 0) 1537 { 1538 // Read code point 1539 size_t nin = *nsrc; 1540 cp = read_utf8_streaming(&src, &nin, force); 1541 if (cp == LSP_UTF32_EOF) // No data ? 1542 break; 1543 1544 // Encode code point 1545 *(dst++) = CPU_TO_LE(cp); 1546 *nsrc = nin; 1547 --(*ndst); 1548 1549 // Update statistics 1550 ++processed; 1551 } 1552 1553 return processed; 1554 } 1555 utf8_to_utf32be(lsp_utf32_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)1556 size_t utf8_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) 1557 { 1558 lsp_utf32_t cp; 1559 size_t processed = 0; 1560 1561 while (*ndst > 0) 1562 { 1563 // Read code point 1564 size_t nin = *nsrc; 1565 cp = read_utf8_streaming(&src, &nin, force); 1566 if (cp == LSP_UTF32_EOF) // No data ? 1567 break; 1568 1569 // Encode code point 1570 *(dst++) = CPU_TO_BE(cp); 1571 *nsrc = nin; 1572 --(*ndst); 1573 1574 // Update statistics 1575 ++processed; 1576 } 1577 1578 return processed; 1579 } 1580 1581 //------------------------------------------------------------------------- 1582 // UTF-16 streaming routines utf16le_to_utf8(char * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)1583 size_t utf16le_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) 1584 { 1585 lsp_utf32_t cp; 1586 size_t processed = 0; 1587 1588 while (*ndst > 0) 1589 { 1590 // Read code point 1591 size_t nin = *nsrc; 1592 cp = read_utf16le_streaming(&src, &nin, force); 1593 if (cp == LSP_UTF32_EOF) // No data ? 1594 break; 1595 1596 // Encode code point 1597 size_t nout = count_utf8(cp); 1598 if (nout > *ndst) 1599 break; 1600 write_utf8_codepoint(&dst, cp); 1601 *nsrc = nin; 1602 *ndst -= nout; 1603 1604 // Update statistics 1605 ++processed; 1606 } 1607 1608 return processed; 1609 } 1610 utf16be_to_utf8(char * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)1611 size_t utf16be_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) 1612 { 1613 lsp_utf32_t cp; 1614 size_t processed = 0; 1615 1616 while (*ndst > 0) 1617 { 1618 // Read code point 1619 size_t nin = *nsrc; 1620 cp = read_utf16be_streaming(&src, &nin, force); 1621 if (cp == LSP_UTF32_EOF) // No data ? 1622 break; 1623 1624 // Encode code point 1625 size_t nout = count_utf8(cp); 1626 if (nout > *ndst) 1627 break; 1628 write_utf8_codepoint(&dst, cp); 1629 *nsrc = nin; 1630 *ndst -= nout; 1631 1632 // Update statistics 1633 ++processed; 1634 } 1635 1636 return processed; 1637 } 1638 utf16le_to_utf32le(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)1639 size_t utf16le_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) 1640 { 1641 lsp_utf32_t cp; 1642 size_t processed = 0; 1643 1644 while (*ndst > 0) 1645 { 1646 // Read code point 1647 size_t nin = *nsrc; 1648 cp = read_utf16le_streaming(&src, &nin, force); 1649 if (cp == LSP_UTF32_EOF) // No data ? 1650 break; 1651 1652 // Encode code point 1653 *(dst++) = CPU_TO_LE(cp); 1654 *nsrc = nin; 1655 --(*ndst); 1656 1657 // Update statistics 1658 ++processed; 1659 } 1660 1661 return processed; 1662 } 1663 utf16be_to_utf32le(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)1664 size_t utf16be_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) 1665 { 1666 lsp_utf32_t cp; 1667 size_t processed = 0; 1668 1669 while (*ndst > 0) 1670 { 1671 // Read code point 1672 size_t nin = *nsrc; 1673 cp = read_utf16be_streaming(&src, &nin, force); 1674 if (cp == LSP_UTF32_EOF) // No data ? 1675 break; 1676 1677 // Encode code point 1678 *(dst++) = CPU_TO_LE(cp); 1679 *nsrc = nin; 1680 --(*ndst); 1681 1682 // Update statistics 1683 ++processed; 1684 } 1685 1686 return processed; 1687 } 1688 utf16le_to_utf32be(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)1689 size_t utf16le_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) 1690 { 1691 lsp_utf32_t cp; 1692 size_t processed = 0; 1693 1694 while (*ndst > 0) 1695 { 1696 // Read code point 1697 size_t nin = *nsrc; 1698 cp = read_utf16le_streaming(&src, &nin, force); 1699 if (cp == LSP_UTF32_EOF) // No data ? 1700 break; 1701 1702 // Encode code point 1703 *(dst++) = CPU_TO_BE(cp); 1704 *nsrc = nin; 1705 --(*ndst); 1706 1707 // Update statistics 1708 ++processed; 1709 } 1710 1711 return processed; 1712 } 1713 utf16be_to_utf32be(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)1714 size_t utf16be_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) 1715 { 1716 lsp_utf32_t cp; 1717 size_t processed = 0; 1718 1719 while (*ndst > 0) 1720 { 1721 // Read code point 1722 size_t nin = *nsrc; 1723 cp = read_utf16be_streaming(&src, &nin, force); 1724 if (cp == LSP_UTF32_EOF) // No data ? 1725 break; 1726 1727 // Encode code point 1728 *(dst++) = CPU_TO_BE(cp); 1729 *nsrc = nin; 1730 --(*ndst); 1731 1732 // Update statistics 1733 ++processed; 1734 } 1735 1736 return processed; 1737 } 1738 1739 //------------------------------------------------------------------------- 1740 // UTF-32 streaming routines utf32le_to_utf8(char * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)1741 size_t utf32le_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) 1742 { 1743 lsp_utf32_t cp; 1744 size_t processed = 0; 1745 1746 while (*ndst > 0) 1747 { 1748 // Read code point 1749 if (*nsrc <= 0) 1750 break; 1751 cp = LE_TO_CPU(*(src++)); 1752 1753 // Encode code point 1754 size_t nout = count_utf8(cp); 1755 if (nout > *ndst) 1756 break; 1757 write_utf8_codepoint(&dst, cp); 1758 --(*nsrc); 1759 *ndst -= nout; 1760 1761 // Update statistics 1762 ++processed; 1763 } 1764 1765 return processed; 1766 } 1767 utf32be_to_utf8(char * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)1768 size_t utf32be_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) 1769 { 1770 lsp_utf32_t cp; 1771 size_t processed = 0; 1772 1773 while (*ndst > 0) 1774 { 1775 // Read code point 1776 if (*nsrc <= 0) 1777 break; 1778 cp = BE_TO_CPU(*(src++)); 1779 1780 // Encode code point 1781 size_t nout = count_utf8(cp); 1782 if (nout > *ndst) 1783 break; 1784 write_utf8_codepoint(&dst, cp); 1785 --(*nsrc); 1786 *ndst -= nout; 1787 1788 // Update statistics 1789 ++processed; 1790 } 1791 1792 return processed; 1793 } 1794 utf32le_to_utf16le(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)1795 size_t utf32le_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) 1796 { 1797 lsp_utf32_t cp; 1798 size_t processed = 0; 1799 1800 while (*ndst > 0) 1801 { 1802 // Read code point 1803 if (*nsrc <= 0) 1804 break; 1805 cp = LE_TO_CPU(*(src++)); 1806 1807 // Encode code point 1808 size_t nout = count_utf16(cp); 1809 if (nout > *ndst) 1810 break; 1811 write_utf16le_codepoint(&dst, cp); 1812 --(*nsrc); 1813 *ndst -= nout; 1814 1815 // Update statistics 1816 ++processed; 1817 } 1818 1819 return processed; 1820 } 1821 utf32le_to_utf16be(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)1822 size_t utf32le_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) 1823 { 1824 lsp_utf32_t cp; 1825 size_t processed = 0; 1826 1827 while (*ndst > 0) 1828 { 1829 // Read code point 1830 if (*nsrc <= 0) 1831 break; 1832 cp = LE_TO_CPU(*(src++)); 1833 1834 // Encode code point 1835 size_t nout = count_utf16(cp); 1836 if (nout > *ndst) 1837 break; 1838 write_utf16be_codepoint(&dst, cp); 1839 --(*nsrc); 1840 *ndst -= nout; 1841 1842 // Update statistics 1843 ++processed; 1844 } 1845 1846 return processed; 1847 } 1848 utf32be_to_utf16le(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)1849 size_t utf32be_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) 1850 { 1851 lsp_utf32_t cp; 1852 size_t processed = 0; 1853 1854 while (*ndst > 0) 1855 { 1856 // Read code point 1857 if (*nsrc <= 0) 1858 break; 1859 cp = BE_TO_CPU(*(src++)); 1860 1861 // Encode code point 1862 size_t nout = count_utf16(cp); 1863 if (nout > *ndst) 1864 break; 1865 write_utf16le_codepoint(&dst, cp); 1866 --(*nsrc); 1867 *ndst -= nout; 1868 1869 // Update statistics 1870 ++processed; 1871 } 1872 1873 return processed; 1874 } 1875 utf32be_to_utf16be(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)1876 size_t utf32be_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) 1877 { 1878 lsp_utf32_t cp; 1879 size_t processed = 0; 1880 1881 while (*ndst > 0) 1882 { 1883 // Read code point 1884 if (*nsrc <= 0) 1885 break; 1886 cp = BE_TO_CPU(*(src++)); 1887 1888 // Encode code point 1889 size_t nout = count_utf16(cp); 1890 if (nout > *ndst) 1891 break; 1892 write_utf16be_codepoint(&dst, cp); 1893 --(*nsrc); 1894 *ndst -= nout; 1895 1896 // Update statistics 1897 ++processed; 1898 } 1899 1900 return processed; 1901 } 1902 1903 #if defined(PLATFORM_WINDOWS) multibyte_to_widechar_utf16le(LPCCH src,size_t * nsrc,LPWSTR dst,size_t * ndst)1904 static ssize_t multibyte_to_widechar_utf16le(LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst) 1905 { 1906 lsp_wchar_t cp; 1907 ssize_t nconv = 0; 1908 const lsp_utf16_t *xsrc = reinterpret_cast<const lsp_utf16_t *>(src); 1909 size_t nin = (*nsrc) >> 1; 1910 size_t nout = *ndst; 1911 1912 while (nin > 0) 1913 { 1914 // Read code point 1915 size_t xin = nin; 1916 cp = read_utf16le_streaming(&xsrc, &xin, false); 1917 if (cp == LSP_UTF32_EOF) // No data ? 1918 break; 1919 1920 // Check that we have enough space 1921 size_t len = count_utf16(cp); 1922 if (nout < len) 1923 break; 1924 1925 // Write code point 1926 write_utf16_codepoint(&dst, cp); 1927 nin = xin; 1928 nout -= len; 1929 nconv += len; 1930 } 1931 1932 *nsrc = ((*nsrc) & 1) + (nin << 1); 1933 *ndst = nout; 1934 return nconv; 1935 } 1936 multibyte_to_widechar_utf16be(LPCCH src,size_t * nsrc,LPWSTR dst,size_t * ndst)1937 static ssize_t multibyte_to_widechar_utf16be(LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst) 1938 { 1939 lsp_wchar_t cp; 1940 ssize_t nconv = 0; 1941 const lsp_utf16_t *xsrc = reinterpret_cast<const lsp_utf16_t *>(src); 1942 size_t nin = (*nsrc) >> 1; 1943 size_t nout = *ndst; 1944 1945 while (nin > 0) 1946 { 1947 // Read code point 1948 size_t xin = nin; 1949 cp = read_utf16be_streaming(&xsrc, &xin, false); 1950 if (cp == LSP_UTF32_EOF) // No data ? 1951 break; 1952 1953 // Check that we have enough space 1954 size_t len = count_utf16(cp); 1955 if (nout < len) 1956 break; 1957 1958 // Write code point 1959 write_utf16_codepoint(&dst, cp); 1960 nin = xin; 1961 nout -= len; 1962 nconv += len; 1963 } 1964 1965 *nsrc = ((*nsrc) & 1) + (nin << 1); 1966 *ndst = nout; 1967 return nconv; 1968 } 1969 est_multibyte_to_widechar_utf16le(LPCCH src,size_t nsrc)1970 static ssize_t est_multibyte_to_widechar_utf16le(LPCCH src, size_t nsrc) 1971 { 1972 lsp_wchar_t cp; 1973 ssize_t nconv = 0; 1974 const lsp_utf16_t *xsrc = reinterpret_cast<const lsp_utf16_t *>(src); 1975 nsrc >>= 1; 1976 1977 while (nsrc > 0) 1978 { 1979 // Read code point 1980 cp = read_utf16le_streaming(&xsrc, &nsrc, false); 1981 if (cp == LSP_UTF32_EOF) // No data ? 1982 break; 1983 1984 // Check that we have enough space 1985 nconv += count_utf16(cp); 1986 } 1987 1988 return nconv; 1989 } 1990 est_multibyte_to_widechar_utf16be(LPCCH src,size_t nsrc)1991 static ssize_t est_multibyte_to_widechar_utf16be(LPCCH src, size_t nsrc) 1992 { 1993 lsp_wchar_t cp; 1994 ssize_t nconv = 0; 1995 const lsp_utf16_t *xsrc = reinterpret_cast<const lsp_utf16_t *>(src); 1996 nsrc >>= 1; 1997 1998 while (nsrc > 0) 1999 { 2000 // Read code point 2001 cp = read_utf16le_streaming(&xsrc, &nsrc, false); 2002 if (cp == LSP_UTF32_EOF) // No data ? 2003 break; 2004 2005 // Check that we have enough space 2006 nconv += count_utf16(cp); 2007 } 2008 2009 return nconv; 2010 } 2011 multibyte_to_widechar_utf32le(LPCCH src,size_t * nsrc,LPWSTR dst,size_t * ndst)2012 static ssize_t multibyte_to_widechar_utf32le(LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst) 2013 { 2014 lsp_wchar_t cp; 2015 ssize_t nconv = 0; 2016 const lsp_utf32_t *xsrc = reinterpret_cast<const lsp_utf32_t *>(src); 2017 size_t nin = (*nsrc) >> 2; 2018 size_t nout = *ndst; 2019 2020 while (nin > 0) 2021 { 2022 // Read code point 2023 cp = LE_TO_CPU(*(xsrc++)); 2024 2025 // Check that we have enough space 2026 size_t len = count_utf16(cp); 2027 if (nout < len) 2028 break; 2029 2030 // Write code point 2031 write_utf16_codepoint(&dst, cp); 2032 nin -= 1; 2033 nout -= len; 2034 nconv += len; 2035 } 2036 2037 *nsrc = ((*nsrc) & 3) + (nin << 2); 2038 *ndst = nout; 2039 return nconv; 2040 } 2041 multibyte_to_widechar_utf32be(LPCCH src,size_t * nsrc,LPWSTR dst,size_t * ndst)2042 static ssize_t multibyte_to_widechar_utf32be(LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst) 2043 { 2044 lsp_wchar_t cp; 2045 ssize_t nconv = 0; 2046 const lsp_utf32_t *xsrc = reinterpret_cast<const lsp_utf32_t *>(src); 2047 size_t nin = (*nsrc) >> 2; 2048 size_t nout = *ndst; 2049 2050 while (nin > 0) 2051 { 2052 // Read code point 2053 cp = BE_TO_CPU(*(xsrc++)); 2054 2055 // Check that we have enough space 2056 size_t len = count_utf16(cp); 2057 if (nout < len) 2058 break; 2059 2060 // Write code point 2061 write_utf16_codepoint(&dst, cp); 2062 nin -= 1; 2063 nout -= len; 2064 nconv += len; 2065 } 2066 2067 *nsrc = ((*nsrc) & 3) + (nin << 2); 2068 *ndst = nout; 2069 return nconv; 2070 } 2071 est_multibyte_to_widechar_utf32le(LPCCH src,size_t nsrc)2072 static ssize_t est_multibyte_to_widechar_utf32le(LPCCH src, size_t nsrc) 2073 { 2074 lsp_wchar_t cp; 2075 ssize_t nconv = 0; 2076 const lsp_utf32_t *xsrc = reinterpret_cast<const lsp_utf32_t *>(src); 2077 nsrc >>= 2; 2078 2079 while (nsrc > 0) 2080 { 2081 // Read code point 2082 cp = LE_TO_CPU(*(xsrc++)); 2083 nconv += count_utf16(cp); 2084 } 2085 2086 return nconv; 2087 } 2088 est_multibyte_to_widechar_utf32be(LPCCH src,size_t nsrc)2089 static ssize_t est_multibyte_to_widechar_utf32be(LPCCH src, size_t nsrc) 2090 { 2091 lsp_wchar_t cp; 2092 ssize_t nconv = 0; 2093 const lsp_utf32_t *xsrc = reinterpret_cast<const lsp_utf32_t *>(src); 2094 nsrc >>= 2; 2095 2096 while (nsrc > 0) 2097 { 2098 // Read code point 2099 cp = BE_TO_CPU(*(xsrc++)); 2100 nconv += count_utf16(cp); 2101 } 2102 2103 return nconv; 2104 } 2105 multibyte_to_widechar(size_t cp,LPCCH src,size_t * nsrc,LPWSTR dst,size_t * ndst)2106 ssize_t multibyte_to_widechar(size_t cp, LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst) 2107 { 2108 ssize_t nconv; 2109 2110 switch (cp) 2111 { 2112 case 1200: // UTF-16LE 2113 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2114 est_multibyte_to_widechar_utf16le(src, *nsrc) : 2115 multibyte_to_widechar_utf16le(src, nsrc, dst, ndst); 2116 break; 2117 case 1201: // UTF-16BE 2118 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2119 est_multibyte_to_widechar_utf16be(src, *nsrc) : 2120 multibyte_to_widechar_utf16be(src, nsrc, dst, ndst); 2121 break; 2122 case 12000: // UTF-32LE 2123 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2124 est_multibyte_to_widechar_utf32le(src, *nsrc) : 2125 multibyte_to_widechar_utf32le(src, nsrc, dst, ndst); 2126 break; 2127 case 12001: // UTF-32BE 2128 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2129 est_multibyte_to_widechar_utf32be(src, *nsrc) : 2130 multibyte_to_widechar_utf32be(src, nsrc, dst, ndst); 2131 break; 2132 default: 2133 // We need just to estimate the size? 2134 if ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) 2135 return ::MultiByteToWideChar(cp, 0, src, *nsrc, 0, 0); 2136 2137 // Do the conversion 2138 nconv = ::MultiByteToWideChar(cp, 0, src, *nsrc, dst, *ndst); 2139 if (nconv == 0) 2140 { 2141 switch (GetLastError()) 2142 { 2143 case ERROR_SUCCESS: 2144 return 0; 2145 case ERROR_INSUFFICIENT_BUFFER: 2146 return -STATUS_NO_MEM; 2147 case ERROR_INVALID_FLAGS: 2148 case ERROR_INVALID_PARAMETER: 2149 return -STATUS_BAD_STATE; 2150 case ERROR_NO_UNICODE_TRANSLATION: 2151 return -STATUS_BAD_LOCALE; 2152 default: 2153 return -STATUS_UNKNOWN_ERR; 2154 } 2155 } 2156 2157 // There are converted characters, analyze output 2158 // If function meets invalid sequence, it replaces the code point with such magic value 2159 // We should know if function has failed 2160 if (dst[nconv-1] == 0xfffd) 2161 --nconv; 2162 2163 if (nconv > 0) 2164 { 2165 // Estimate number of bytes decoded (yep, this is dumb but no way...) 2166 ssize_t nbytes = ::WideCharToMultiByte(cp, 0, dst, nconv, NULL, 0, 0, 0); 2167 if (nbytes <= 0) 2168 return -STATUS_IO_ERROR; 2169 2170 *nsrc -= nbytes; 2171 *ndst -= nconv; 2172 } 2173 2174 break; 2175 } 2176 2177 return nconv; 2178 } 2179 widechar_to_multibyte_utf16le(const lsp_utf16_t * src,size_t * nsrc,char * dst,size_t * ndst)2180 static ssize_t widechar_to_multibyte_utf16le(const lsp_utf16_t *src, size_t *nsrc, char *dst, size_t *ndst) 2181 { 2182 lsp_wchar_t cp; 2183 ssize_t nconv = 0; 2184 size_t nin = *nsrc; 2185 size_t nout = *ndst; 2186 lsp_utf16_t *xdst = reinterpret_cast<lsp_utf16_t *>(dst); 2187 2188 while (nin > 0) 2189 { 2190 size_t xin = nin; 2191 cp = read_utf16_streaming(&src, &xin, false); 2192 if (cp == LSP_UTF32_EOF) // No data ? 2193 break; 2194 2195 // Check that we have enough space 2196 size_t len = sizeof_utf16(cp); 2197 if (nout < len) 2198 break; 2199 2200 // Write code point 2201 write_utf16le_codepoint(&xdst, cp); 2202 nin = xin; 2203 nout -= len; 2204 nconv += len; 2205 } 2206 2207 *nsrc = nin; 2208 *ndst = nout; 2209 return nconv; 2210 } 2211 widechar_to_multibyte_utf16be(const lsp_utf16_t * src,size_t * nsrc,char * dst,size_t * ndst)2212 static ssize_t widechar_to_multibyte_utf16be(const lsp_utf16_t *src, size_t *nsrc, char *dst, size_t *ndst) 2213 { 2214 lsp_wchar_t cp; 2215 ssize_t nconv = 0; 2216 size_t nin = *nsrc; 2217 size_t nout = *ndst; 2218 lsp_utf16_t *xdst = reinterpret_cast<lsp_utf16_t *>(dst); 2219 2220 while (nin > 0) 2221 { 2222 size_t xin = nin; 2223 cp = read_utf16_streaming(&src, &xin, false); 2224 if (cp == LSP_UTF32_EOF) // No data ? 2225 break; 2226 2227 // Check that we have enough space 2228 size_t len = sizeof_utf16(cp); 2229 if (nout < len) 2230 break; 2231 2232 // Write code point 2233 write_utf16be_codepoint(&xdst, cp); 2234 nin = xin; 2235 nout -= len; 2236 nconv += len; 2237 } 2238 2239 *nsrc = nin; 2240 *ndst = nout; 2241 return nconv; 2242 } 2243 est_widechar_to_multibyte_utf16(const lsp_utf16_t * src,size_t nsrc)2244 static ssize_t est_widechar_to_multibyte_utf16(const lsp_utf16_t *src, size_t nsrc) 2245 { 2246 lsp_wchar_t cp; 2247 ssize_t nconv = 0; 2248 2249 while (nsrc > 0) 2250 { 2251 cp = read_utf16_streaming(&src, &nsrc, false); 2252 if (cp == LSP_UTF32_EOF) // No data ? 2253 break; 2254 2255 // Check that we have enough space 2256 nconv += sizeof_utf16(cp); 2257 } 2258 2259 return nconv; 2260 } 2261 widechar_to_multibyte_utf32le(const lsp_utf16_t * src,size_t * nsrc,char * dst,size_t * ndst)2262 static ssize_t widechar_to_multibyte_utf32le(const lsp_utf16_t *src, size_t *nsrc, char *dst, size_t *ndst) 2263 { 2264 lsp_wchar_t cp; 2265 ssize_t nconv = 0; 2266 size_t nin = *nsrc; 2267 size_t nout = *ndst; 2268 lsp_utf32_t *xdst = reinterpret_cast<lsp_utf32_t *>(dst); 2269 2270 while (nin > 0) 2271 { 2272 size_t xin = nin; 2273 cp = read_utf16_streaming(&src, &xin, false); 2274 if (cp == LSP_UTF32_EOF) // No data ? 2275 break; 2276 2277 // Check that we have enough space 2278 if (nout < sizeof(lsp_utf32_t)) 2279 break; 2280 2281 // Write code point 2282 *(xdst++) = CPU_TO_LE(cp); 2283 nin = xin; 2284 nout -= sizeof(lsp_utf32_t); 2285 nconv += sizeof(lsp_utf32_t); 2286 } 2287 2288 *nsrc = nin; 2289 *ndst = nout; 2290 return nconv; 2291 } 2292 widechar_to_multibyte_utf32be(const lsp_utf16_t * src,size_t * nsrc,char * dst,size_t * ndst)2293 static ssize_t widechar_to_multibyte_utf32be(const lsp_utf16_t *src, size_t *nsrc, char *dst, size_t *ndst) 2294 { 2295 lsp_wchar_t cp; 2296 ssize_t nconv = 0; 2297 size_t nin = *nsrc; 2298 size_t nout = *ndst; 2299 lsp_utf32_t *xdst = reinterpret_cast<lsp_utf32_t *>(dst); 2300 2301 while (nin > 0) 2302 { 2303 size_t xin = nin; 2304 cp = read_utf16_streaming(&src, &xin, false); 2305 if (cp == LSP_UTF32_EOF) // No data ? 2306 break; 2307 2308 // Check that we have enough space 2309 if (nout < sizeof(lsp_utf32_t)) 2310 break; 2311 2312 // Write code point 2313 *(xdst++) = CPU_TO_BE(cp); 2314 nin = xin; 2315 nout -= sizeof(lsp_utf32_t); 2316 nconv += sizeof(lsp_utf32_t); 2317 } 2318 2319 *nsrc = nin; 2320 *ndst = nout; 2321 return nconv; 2322 } 2323 est_widechar_to_multibyte_utf32(const lsp_utf16_t * src,size_t nsrc)2324 static ssize_t est_widechar_to_multibyte_utf32(const lsp_utf16_t *src, size_t nsrc) 2325 { 2326 lsp_wchar_t cp; 2327 ssize_t nconv = 0; 2328 2329 while (nsrc > 0) 2330 { 2331 cp = read_utf16_streaming(&src, &nsrc, false); 2332 if (cp == LSP_UTF32_EOF) // No data ? 2333 break; 2334 nconv += sizeof(lsp_utf32_t); 2335 } 2336 2337 return nconv; 2338 } 2339 widechar_to_multibyte_split(const lsp_utf16_t * src,size_t limit)2340 static size_t widechar_to_multibyte_split(const lsp_utf16_t *src, size_t limit) 2341 { 2342 // Estimate the middle of an array 2343 size_t half = limit >> 1; 2344 if (half <= 0) 2345 return half; 2346 2347 // Now scan valid code points until we reach the end of array 2348 lsp_wchar_t cp; 2349 limit = half; 2350 while (true) 2351 { 2352 cp = read_utf16_streaming(&src, &limit, false); 2353 if (cp == LSP_UTF32_EOF) // No data ? 2354 break; 2355 } 2356 2357 // Return the result as middle of array without remained points in limit 2358 return half - limit; 2359 } 2360 widechar_to_multibyte(size_t cp,LPCWCH src,size_t * nsrc,LPSTR dst,size_t * ndst)2361 ssize_t widechar_to_multibyte(size_t cp, LPCWCH src, size_t *nsrc, LPSTR dst, size_t *ndst) 2362 { 2363 ssize_t nconv; 2364 2365 switch (cp) 2366 { 2367 case 1200: // UTF-16LE 2368 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2369 est_widechar_to_multibyte_utf16(src, *nsrc) : 2370 widechar_to_multibyte_utf16le(src, nsrc, dst, ndst); 2371 break; 2372 case 1201: // UTF-16BE 2373 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2374 est_widechar_to_multibyte_utf16(src, *nsrc) : 2375 widechar_to_multibyte_utf16be(src, nsrc, dst, ndst); 2376 break; 2377 case 12000: // UTF-32LE 2378 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2379 est_widechar_to_multibyte_utf32(src, *nsrc) : 2380 widechar_to_multibyte_utf32le(src, nsrc, dst, ndst); 2381 break; 2382 case 12001: // UTF-32BE 2383 nconv = ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) ? 2384 est_widechar_to_multibyte_utf32(src, *nsrc) : 2385 widechar_to_multibyte_utf32be(src, nsrc, dst, ndst); 2386 break; 2387 default: 2388 { 2389 // Just estimate number of characters? 2390 if ((dst == NULL) || (ndst == NULL) || (ssize_t(*ndst) <= 0)) 2391 { 2392 nconv = ::WideCharToMultiByte(cp, 0, src, *nsrc, NULL, 0, 0, FALSE); 2393 if (nconv == 0) 2394 { 2395 switch (::GetLastError()) 2396 { 2397 case ERROR_SUCCESS: 2398 return 0; 2399 case ERROR_INSUFFICIENT_BUFFER: 2400 return -STATUS_NO_MEM; 2401 case ERROR_INVALID_FLAGS: 2402 case ERROR_INVALID_PARAMETER: 2403 return -STATUS_BAD_STATE; 2404 case ERROR_NO_UNICODE_TRANSLATION: 2405 return -STATUS_BAD_LOCALE; 2406 default: 2407 return -STATUS_UNKNOWN_ERR; 2408 } 2409 } 2410 return nconv; 2411 } 2412 2413 // Perform first try 2414 size_t xnsrc = *nsrc; 2415 nconv = ::WideCharToMultiByte(cp, 0, src, xnsrc, dst, *ndst, 0, FALSE); 2416 2417 // Do while conversion is unsuccessful 2418 while (nconv <= 0) 2419 { 2420 // There was a fail, analyze it 2421 switch (::GetLastError()) 2422 { 2423 case ERROR_SUCCESS: 2424 return 0; 2425 case ERROR_INSUFFICIENT_BUFFER: 2426 break; // Will retry with twice lesser input buffer 2427 case ERROR_INVALID_FLAGS: 2428 case ERROR_INVALID_PARAMETER: 2429 return -STATUS_BAD_STATE; 2430 case ERROR_NO_UNICODE_TRANSLATION: 2431 return -STATUS_BAD_LOCALE; 2432 default: 2433 return -STATUS_UNKNOWN_ERR; 2434 } 2435 2436 // Try to twice reduce the buffer size, validate data for surrogates 2437 xnsrc = widechar_to_multibyte_split(src, xnsrc); 2438 if (xnsrc <= 0) 2439 break; 2440 2441 // Perform next conversion try with lesser buffer 2442 nconv = ::WideCharToMultiByte(cp, 0, src, xnsrc, dst, *ndst, 0, FALSE); 2443 } 2444 2445 *ndst -= nconv; 2446 *nsrc -= xnsrc; 2447 } 2448 break; 2449 } 2450 2451 return nconv; 2452 } 2453 #endif /* PLATFORM_WINDOWS */ 2454 } 2455