1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 8 мар. 2019 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #include <core/io/CharsetDecoder.h> 23 #include <errno.h> 24 25 #define DATA_BUFSIZE 0x1000 26 27 namespace lsp 28 { 29 namespace io 30 { 31 CharsetDecoder()32 CharsetDecoder::CharsetDecoder() 33 { 34 bBuffer = NULL; 35 bBufHead = NULL; 36 bBufTail = NULL; 37 cBuffer = NULL; 38 cBufHead = NULL; 39 cBufTail = NULL; 40 41 #if defined(PLATFORM_WINDOWS) 42 xBuffer = NULL; 43 nCodePage = UINT(-1); 44 #else 45 hIconv = iconv_t(-1); 46 #endif /* PLATFORM_WINDOWS */ 47 } 48 ~CharsetDecoder()49 CharsetDecoder::~CharsetDecoder() 50 { 51 close(); 52 } 53 init(const char * charset)54 status_t CharsetDecoder::init(const char *charset) 55 { 56 #if defined(PLATFORM_WINDOWS) 57 if (nCodePage != UINT(-1)) 58 return STATUS_BAD_STATE; 59 60 ssize_t cp = codepage_from_name(charset); 61 if (cp < 0) 62 return STATUS_BAD_LOCALE; 63 nCodePage = cp; 64 #else 65 if (hIconv != iconv_t(-1)) 66 return STATUS_BAD_STATE; 67 68 iconv_t handle = init_iconv_to_wchar_t(charset); 69 if (handle == iconv_t(-1)) 70 return STATUS_BAD_LOCALE; 71 hIconv = handle; 72 #endif /* PLATFORM_WINDOWS */ 73 74 // Allocate buffer 75 uint8_t *buf= reinterpret_cast<uint8_t *>(::malloc( 76 DATA_BUFSIZE // The byte buffer size 77 + sizeof(lsp_wchar_t) * DATA_BUFSIZE * 2 // The temporary buffer size 78 #if defined(PLATFORM_WINDOWS) 79 + sizeof(lsp_utf16_t) * DATA_BUFSIZE * 2 80 #endif /* PLATFORM_WINDOWS */ 81 )); 82 if (buf == NULL) 83 { 84 close(); 85 return STATUS_NO_MEM; 86 } 87 88 bBuffer = buf; 89 bBufHead = bBuffer; 90 bBufTail = bBuffer; 91 buf += DATA_BUFSIZE; 92 cBuffer = reinterpret_cast<lsp_wchar_t *>(buf); 93 cBufHead = cBuffer; 94 cBufTail = cBuffer; 95 96 #if defined(PLATFORM_WINDOWS) 97 buf += sizeof(lsp_wchar_t) * DATA_BUFSIZE * 2; 98 xBuffer = reinterpret_cast<lsp_utf16_t *>(buf); 99 #endif /* PLATFORM_WINDOWS */ 100 101 return STATUS_OK; 102 } 103 close()104 void CharsetDecoder::close() 105 { 106 if (bBuffer != NULL) 107 { 108 free(bBuffer); 109 110 bBuffer = NULL; 111 bBufHead = NULL; 112 bBufTail = NULL; 113 cBuffer = NULL; 114 cBufHead = NULL; 115 cBufTail = NULL; 116 } 117 118 #ifdef PLATFORM_WINDOWS 119 xBuffer = NULL; 120 nCodePage = UINT(-1); 121 #else 122 if (hIconv != iconv_t(-1)) 123 { 124 ::iconv_close(hIconv); 125 hIconv = iconv_t(-1); 126 } 127 #endif /* PLATFORM_WINDOWS */ 128 } 129 #if 0 130 ssize_t CharsetDecoder::decode(lsp_wchar_t **outbuf, size_t *outleft, void **inbuf, size_t *inleft) 131 { 132 size_t nconv; 133 134 #if defined(PLATFORM_WINDOWS) 135 CHAR *xinbuf = reinterpret_cast<CHAR *>(*inbuf); 136 lsp_wchar_t *xoutbuf= *outbuf; 137 size_t xinleft = *inleft; 138 size_t xoutleft = *outleft; 139 nconv = 0; 140 141 while (xoutleft > 0) 142 { 143 // Is there a data in wchar_t buffer? 144 size_t nbuf = cBufTail - cBufHead; 145 if (nbuf > 0) 146 { 147 size_t nsrc = nbuf; 148 nbuf = utf16_to_utf32(xoutbuf, &xoutleft, cBufHead, &nsrc, false); 149 if (nbuf <= 0) 150 break; 151 152 nconv += nbuf; 153 xoutbuf += nbuf; 154 cBufHead += nbuf; 155 continue; 156 } 157 158 // Fill the rest space with converted UTF-16 data 159 // Each input character can take up to 2 UTF-16 characters, prevent from buffer overflows 160 // We can manipulate only with input buffer size because otherwise we will 161 // get a conversion error from dump MultiByteToWideChar routine 162 // character buffer is guaranteed to be empty 163 size_t xinamount = (xinleft > DATA_BUFSIZE) ? DATA_BUFSIZE : xinleft; 164 size_t bufcw = DATA_BUFSIZE*2; 165 size_t xincw = xinamount; 166 167 ssize_t nchars = multibyte_to_widechar(nCodePage, xinbuf, &xincw, cBuffer, &bufcw); 168 if (nchars <= 0) 169 { 170 if (nconv <= 0) 171 return nchars; 172 break; 173 } 174 175 // Update pointers and data 176 xinamount -= xincw; 177 cBufHead = cBuffer; 178 cBufTail = &cBuffer[nchars]; 179 xinbuf += xinamount; 180 xinleft -= xinamount; 181 } 182 183 // Update pointers and values 184 *outbuf = xoutbuf; 185 *outleft = xoutleft; 186 *inbuf = reinterpret_cast<void *>(xinbuf); 187 *inleft = xinleft; 188 #else 189 char *xinbuf = reinterpret_cast<char *>(*inbuf); 190 char *xoutbuf = reinterpret_cast<char *>(*outbuf); 191 size_t xinleft = *inleft; 192 size_t xoutleft = *outleft * sizeof(lsp_wchar_t); 193 194 // Perform conversion 195 nconv = ::iconv(hIconv, &xinbuf, &xinleft, &xoutbuf, &xoutleft); 196 if (nconv == size_t(-1)) 197 { 198 int code = errno; 199 switch (code) 200 { 201 case E2BIG: 202 case EINVAL: 203 nconv = *outleft - (xoutleft/sizeof(lsp_wchar_t)); 204 break; 205 default: 206 return -STATUS_BAD_FORMAT; 207 } 208 } 209 210 // Update pointers and values 211 *outbuf = reinterpret_cast<lsp_wchar_t *>(xoutbuf); 212 *outleft = xoutleft / sizeof(lsp_wchar_t); 213 *inbuf = reinterpret_cast<void *>(xinbuf); 214 *inleft = xinleft; 215 #endif /* PLATFORM_WINDOWS */ 216 217 return nconv; 218 } 219 #endif 220 prepare_buffer()221 size_t CharsetDecoder::prepare_buffer() 222 { 223 size_t bufsz = bBufTail - bBufHead; 224 if (bufsz > (DATA_BUFSIZE >> 1)) 225 return 0; 226 else if (bBufHead != bBuffer) 227 { 228 if (bufsz > 0) 229 ::memmove(bBuffer, bBufHead, bufsz); 230 231 bBufHead = bBuffer; 232 bBufTail = &bBuffer[bufsz]; 233 } 234 return DATA_BUFSIZE - bufsz; 235 } 236 decode_buffer()237 ssize_t CharsetDecoder::decode_buffer() 238 { 239 // Prepare buffer 240 size_t bufsz = cBufTail - cBufHead; 241 if (bufsz > DATA_BUFSIZE) 242 return bufsz; 243 else if (cBufHead != cBuffer) 244 { 245 if (bufsz > 0) 246 ::memmove(cBuffer, cBufHead, bufsz * sizeof(lsp_wchar_t)); 247 248 cBufHead = cBuffer; 249 cBufTail = &cBuffer[bufsz]; 250 } 251 252 // Is there any data in byte buffer? 253 size_t xinleft = bBufTail - bBufHead; 254 if (!xinleft) 255 return bufsz; 256 257 // Now we can surely decode DATA_BUFSIZE characters 258 #ifdef PLATFORM_WINDOWS 259 // Round 1: Perform native -> UTF-16 decoding 260 CHAR *xinbuf = reinterpret_cast<CHAR *>(bBufHead); 261 size_t nsrc = xinleft; 262 size_t ndst = DATA_BUFSIZE*2; 263 ssize_t nbytes = multibyte_to_widechar(nCodePage, xinbuf, &nsrc, xBuffer, &ndst); 264 if (nbytes <= 0) 265 return nbytes; 266 uint8_t *bhead = &bBufHead[xinleft - nsrc]; 267 268 // Round 2: Perform UTF-16 -> UTF-32 decoding 269 nsrc = DATA_BUFSIZE*2 - ndst; 270 ndst = DATA_BUFSIZE; 271 ssize_t nchars = utf16_to_utf32(cBufTail, &ndst, xBuffer, &nsrc, false); 272 if (nchars <= 0) 273 return nchars; 274 275 bBufHead = bhead; 276 cBufTail += DATA_BUFSIZE - ndst; 277 #else 278 char *xinbuf = reinterpret_cast<char *>(bBufHead); 279 char *xoutbuf = reinterpret_cast<char *>(cBufTail); 280 bufsz = DATA_BUFSIZE * sizeof(lsp_wchar_t); 281 282 // Perform conversion 283 size_t nconv = ::iconv(hIconv, &xinbuf, &xinleft, &xoutbuf, &bufsz); 284 if (nconv == size_t(-1)) 285 { 286 int code = errno; 287 switch (code) 288 { 289 case E2BIG: 290 case EINVAL: 291 break; 292 default: 293 return -STATUS_BAD_FORMAT; 294 } 295 } 296 297 bBufHead = reinterpret_cast<uint8_t *>(xinbuf); 298 cBufTail = reinterpret_cast<lsp_wchar_t *>(xoutbuf); 299 #endif 300 return cBufTail - cBufHead; 301 } 302 fetch()303 lsp_swchar_t CharsetDecoder::fetch() 304 { 305 if (bBuffer == NULL) 306 return -STATUS_CLOSED; 307 308 // Is there any data in character buffer 309 if (cBufTail > cBufHead) 310 return *(cBufHead++); 311 312 ssize_t nchars = decode_buffer(); 313 if (nchars > 0) 314 return *(cBufHead++); 315 return (nchars < 0) ? nchars : -STATUS_EOF; 316 } 317 fetch(lsp_wchar_t * outbuf,size_t count)318 ssize_t CharsetDecoder::fetch(lsp_wchar_t *outbuf, size_t count) 319 { 320 if (bBuffer == NULL) 321 return -STATUS_CLOSED; 322 else if (outbuf == NULL) 323 return -STATUS_BAD_ARGUMENTS; 324 325 // Compute the amount of data to read 326 size_t processed = 0; 327 328 // Perform read 329 while (processed < count) 330 { 331 // Perform decoding 332 ssize_t nchars = decode_buffer(); 333 if (nchars <= 0) 334 { 335 if (processed > 0) 336 break; 337 return nchars; 338 } 339 340 // Copy data to output buffer 341 ssize_t to_copy = count - processed; 342 if (nchars > to_copy) 343 nchars = to_copy; 344 ::memcpy(outbuf, cBufHead, nchars * sizeof(lsp_wchar_t)); 345 346 // Update state 347 cBufHead += nchars; 348 processed += nchars; 349 outbuf += nchars; 350 } 351 352 return processed; 353 } 354 fetch(LSPString * out,size_t count)355 ssize_t CharsetDecoder::fetch(LSPString *out, size_t count) 356 { 357 if (bBuffer == NULL) 358 return -STATUS_CLOSED; 359 else if (out == NULL) 360 return -STATUS_BAD_ARGUMENTS; 361 362 // Compute the amount of data to read 363 size_t processed = 0; 364 if (!count) 365 count = DATA_BUFSIZE*2; 366 367 // Perform read 368 while (processed < count) 369 { 370 // Perform decoding 371 ssize_t nchars = decode_buffer(); 372 if (nchars <= 0) 373 { 374 if (processed > 0) 375 break; 376 return nchars; 377 } 378 379 // Copy data to output buffer 380 ssize_t to_copy = count - processed; 381 if (nchars > to_copy) 382 nchars = to_copy; 383 if (!out->append(cBufHead, nchars)) 384 return -STATUS_NO_MEM; 385 386 // Update state 387 cBufHead += nchars; 388 processed += nchars; 389 } 390 391 return processed; 392 } 393 fetch(IOutSequence * out,size_t count)394 ssize_t CharsetDecoder::fetch(IOutSequence *out, size_t count) 395 { 396 if (bBuffer == NULL) 397 return -STATUS_CLOSED; 398 else if (out == NULL) 399 return -STATUS_BAD_ARGUMENTS; 400 401 // Compute the amount of data to read 402 size_t processed = 0; 403 if (!count) 404 count = DATA_BUFSIZE*2; 405 406 // Perform read 407 while (processed < count) 408 { 409 // Perform decoding 410 ssize_t nchars = decode_buffer(); 411 if (nchars <= 0) 412 { 413 if (processed > 0) 414 break; 415 return nchars; 416 } 417 418 // Write data to output sequence 419 ssize_t to_copy = count - processed; 420 if (nchars > to_copy) 421 nchars = to_copy; 422 nchars = out->write(cBufHead, nchars); 423 if (nchars < 0) 424 { 425 if (processed > 0) 426 break; 427 return nchars; 428 } 429 430 // Update state 431 cBufHead += nchars; 432 processed += nchars; 433 } 434 435 return processed; 436 } 437 fill(const void * buf,size_t count)438 ssize_t CharsetDecoder::fill(const void *buf, size_t count) 439 { 440 if (bBuffer == NULL) 441 return -STATUS_CLOSED; 442 else if (buf == NULL) 443 return -STATUS_BAD_ARGUMENTS; 444 445 size_t bufsz = prepare_buffer(); 446 if (bufsz <= 0) 447 return bufsz; 448 449 if (count > bufsz) 450 count = bufsz; 451 ::memcpy(&bBufTail, buf, count); 452 bBufTail += count; 453 return count; 454 } 455 fill(File * fd,size_t count)456 ssize_t CharsetDecoder::fill(File *fd, size_t count) 457 { 458 if (bBuffer == NULL) 459 return -STATUS_CLOSED; 460 else if (fd == NULL) 461 return -STATUS_BAD_ARGUMENTS; 462 463 // Is there a space in the buffer for reading? 464 size_t bufsz = prepare_buffer(); 465 if (bufsz <= 0) 466 return bufsz; 467 468 // Compute the amount of data to read 469 size_t read = 0; 470 if ((!count) || (count > bufsz)) 471 count = bufsz; 472 473 // Perform read 474 do 475 { 476 ssize_t nread = fd->read(bBufTail, count - read); 477 if (nread <= 0) 478 { 479 if (read > 0) // Ignore error if there is data on the input 480 break; 481 return nread; 482 } 483 484 bBufTail += nread; 485 read += nread; 486 } 487 while (read < count); 488 489 return read; 490 } 491 fill(IInStream * is,size_t count)492 ssize_t CharsetDecoder::fill(IInStream *is, size_t count) 493 { 494 if (bBuffer == NULL) 495 return -STATUS_CLOSED; 496 else if (is == NULL) 497 return -STATUS_BAD_ARGUMENTS; 498 499 // Is there a space in the buffer for reading? 500 size_t bufsz = prepare_buffer(); 501 if (bufsz <= 0) 502 return bufsz; 503 504 // Compute the amount of data to read 505 size_t read = 0; 506 if ((!count) || (count > bufsz)) 507 count = bufsz; 508 509 // Perform read 510 do 511 { 512 ssize_t nread = is->read(bBufTail, count - read); 513 if (nread <= 0) 514 { 515 if (read > 0) // Ignore error if there is data on the input 516 break; 517 return nread; 518 } 519 520 bBufTail += nread; 521 read += nread; 522 } 523 while (read < count); 524 525 return read; 526 } 527 } /* namespace io */ 528 } /* namespace lsp */ 529