1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 8 мар. 2019 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <core/io/CharsetDecoder.h>
23 #include <errno.h>
24 
25 #define DATA_BUFSIZE   0x1000
26 
27 namespace lsp
28 {
29     namespace io
30     {
31 
CharsetDecoder()32         CharsetDecoder::CharsetDecoder()
33         {
34             bBuffer         = NULL;
35             bBufHead        = NULL;
36             bBufTail        = NULL;
37             cBuffer         = NULL;
38             cBufHead        = NULL;
39             cBufTail        = NULL;
40 
41 #if defined(PLATFORM_WINDOWS)
42             xBuffer         = NULL;
43             nCodePage       = UINT(-1);
44 #else
45             hIconv          = iconv_t(-1);
46 #endif /* PLATFORM_WINDOWS */
47         }
48 
~CharsetDecoder()49         CharsetDecoder::~CharsetDecoder()
50         {
51             close();
52         }
53 
init(const char * charset)54         status_t CharsetDecoder::init(const char *charset)
55         {
56 #if defined(PLATFORM_WINDOWS)
57             if (nCodePage != UINT(-1))
58                 return STATUS_BAD_STATE;
59 
60             ssize_t cp  = codepage_from_name(charset);
61             if (cp < 0)
62                 return STATUS_BAD_LOCALE;
63             nCodePage       = cp;
64 #else
65             if (hIconv != iconv_t(-1))
66                 return STATUS_BAD_STATE;
67 
68             iconv_t handle = init_iconv_to_wchar_t(charset);
69             if (handle == iconv_t(-1))
70                 return STATUS_BAD_LOCALE;
71             hIconv      = handle;
72 #endif /* PLATFORM_WINDOWS */
73 
74             // Allocate buffer
75             uint8_t *buf= reinterpret_cast<uint8_t *>(::malloc(
76                         DATA_BUFSIZE    // The byte buffer size
77                         + sizeof(lsp_wchar_t) * DATA_BUFSIZE * 2 // The temporary buffer size
78 #if defined(PLATFORM_WINDOWS)
79                         + sizeof(lsp_utf16_t) * DATA_BUFSIZE * 2
80 #endif /* PLATFORM_WINDOWS */
81                     ));
82             if (buf == NULL)
83             {
84                 close();
85                 return STATUS_NO_MEM;
86             }
87 
88             bBuffer         = buf;
89             bBufHead        = bBuffer;
90             bBufTail        = bBuffer;
91             buf            += DATA_BUFSIZE;
92             cBuffer         = reinterpret_cast<lsp_wchar_t *>(buf);
93             cBufHead        = cBuffer;
94             cBufTail        = cBuffer;
95 
96 #if defined(PLATFORM_WINDOWS)
97             buf            += sizeof(lsp_wchar_t) * DATA_BUFSIZE * 2;
98             xBuffer         = reinterpret_cast<lsp_utf16_t *>(buf);
99 #endif /* PLATFORM_WINDOWS */
100 
101             return STATUS_OK;
102         }
103 
close()104         void CharsetDecoder::close()
105         {
106             if (bBuffer != NULL)
107             {
108                 free(bBuffer);
109 
110                 bBuffer         = NULL;
111                 bBufHead        = NULL;
112                 bBufTail        = NULL;
113                 cBuffer         = NULL;
114                 cBufHead        = NULL;
115                 cBufTail        = NULL;
116             }
117 
118 #ifdef PLATFORM_WINDOWS
119             xBuffer     = NULL;
120             nCodePage   = UINT(-1);
121 #else
122             if (hIconv != iconv_t(-1))
123             {
124                 ::iconv_close(hIconv);
125                 hIconv      = iconv_t(-1);
126             }
127 #endif /* PLATFORM_WINDOWS */
128         }
129 #if 0
130         ssize_t CharsetDecoder::decode(lsp_wchar_t **outbuf, size_t *outleft, void **inbuf, size_t *inleft)
131         {
132             size_t nconv;
133 
134 #if defined(PLATFORM_WINDOWS)
135             CHAR *xinbuf        = reinterpret_cast<CHAR *>(*inbuf);
136             lsp_wchar_t *xoutbuf= *outbuf;
137             size_t xinleft      = *inleft;
138             size_t xoutleft     = *outleft;
139             nconv               = 0;
140 
141             while (xoutleft > 0)
142             {
143                 // Is there a data in wchar_t buffer?
144                 size_t nbuf     = cBufTail - cBufHead;
145                 if (nbuf > 0)
146                 {
147                     size_t nsrc = nbuf;
148                     nbuf        = utf16_to_utf32(xoutbuf, &xoutleft, cBufHead, &nsrc, false);
149                     if (nbuf <= 0)
150                         break;
151 
152                     nconv          += nbuf;
153                     xoutbuf        += nbuf;
154                     cBufHead       += nbuf;
155                     continue;
156                 }
157 
158                 // Fill the rest space with converted UTF-16 data
159                 // Each input character can take up to 2 UTF-16 characters, prevent from buffer overflows
160                 // We can manipulate only with input buffer size because otherwise we will
161                 // get a conversion error from dump MultiByteToWideChar routine
162                 // character buffer is guaranteed to be empty
163                 size_t xinamount    = (xinleft > DATA_BUFSIZE) ? DATA_BUFSIZE : xinleft;
164                 size_t bufcw        = DATA_BUFSIZE*2;
165                 size_t xincw        = xinamount;
166 
167                 ssize_t nchars      = multibyte_to_widechar(nCodePage, xinbuf, &xincw, cBuffer, &bufcw);
168                 if (nchars <= 0)
169                 {
170                     if (nconv <= 0)
171                         return nchars;
172                     break;
173                 }
174 
175                 // Update pointers and data
176                 xinamount      -= xincw;
177                 cBufHead        = cBuffer;
178                 cBufTail        = &cBuffer[nchars];
179                 xinbuf         += xinamount;
180                 xinleft        -= xinamount;
181             }
182 
183             // Update pointers and values
184             *outbuf             = xoutbuf;
185             *outleft            = xoutleft;
186             *inbuf              = reinterpret_cast<void *>(xinbuf);
187             *inleft             = xinleft;
188 #else
189             char *xinbuf        = reinterpret_cast<char *>(*inbuf);
190             char *xoutbuf       = reinterpret_cast<char *>(*outbuf);
191             size_t xinleft      = *inleft;
192             size_t xoutleft     = *outleft * sizeof(lsp_wchar_t);
193 
194             // Perform conversion
195             nconv               = ::iconv(hIconv, &xinbuf, &xinleft, &xoutbuf, &xoutleft);
196             if (nconv == size_t(-1))
197             {
198                 int code = errno;
199                 switch (code)
200                 {
201                     case E2BIG:
202                     case EINVAL:
203                         nconv   = *outleft - (xoutleft/sizeof(lsp_wchar_t));
204                         break;
205                     default:
206                         return -STATUS_BAD_FORMAT;
207                 }
208             }
209 
210             // Update pointers and values
211             *outbuf             = reinterpret_cast<lsp_wchar_t *>(xoutbuf);
212             *outleft            = xoutleft / sizeof(lsp_wchar_t);
213             *inbuf              = reinterpret_cast<void *>(xinbuf);
214             *inleft             = xinleft;
215 #endif /* PLATFORM_WINDOWS */
216 
217             return nconv;
218         }
219 #endif
220 
prepare_buffer()221         size_t CharsetDecoder::prepare_buffer()
222         {
223             size_t bufsz = bBufTail - bBufHead;
224             if (bufsz > (DATA_BUFSIZE >> 1))
225                 return 0;
226             else if (bBufHead != bBuffer)
227             {
228                 if (bufsz > 0)
229                     ::memmove(bBuffer, bBufHead, bufsz);
230 
231                 bBufHead    = bBuffer;
232                 bBufTail    = &bBuffer[bufsz];
233             }
234             return DATA_BUFSIZE - bufsz;
235         }
236 
decode_buffer()237         ssize_t CharsetDecoder::decode_buffer()
238         {
239             // Prepare buffer
240             size_t bufsz = cBufTail - cBufHead;
241             if (bufsz > DATA_BUFSIZE)
242                 return bufsz;
243             else if (cBufHead != cBuffer)
244             {
245                 if (bufsz > 0)
246                     ::memmove(cBuffer, cBufHead, bufsz * sizeof(lsp_wchar_t));
247 
248                 cBufHead    = cBuffer;
249                 cBufTail    = &cBuffer[bufsz];
250             }
251 
252             // Is there any data in byte buffer?
253             size_t xinleft      = bBufTail - bBufHead;
254             if (!xinleft)
255                 return bufsz;
256 
257             // Now we can surely decode DATA_BUFSIZE characters
258 #ifdef PLATFORM_WINDOWS
259             // Round 1: Perform native -> UTF-16 decoding
260             CHAR *xinbuf        = reinterpret_cast<CHAR *>(bBufHead);
261             size_t nsrc         = xinleft;
262             size_t ndst         = DATA_BUFSIZE*2;
263             ssize_t nbytes      = multibyte_to_widechar(nCodePage, xinbuf, &nsrc, xBuffer, &ndst);
264             if (nbytes <= 0)
265                 return nbytes;
266             uint8_t *bhead      = &bBufHead[xinleft - nsrc];
267 
268             // Round 2: Perform UTF-16 -> UTF-32 decoding
269             nsrc                = DATA_BUFSIZE*2 - ndst;
270             ndst                = DATA_BUFSIZE;
271             ssize_t nchars      = utf16_to_utf32(cBufTail, &ndst, xBuffer, &nsrc, false);
272             if (nchars <= 0)
273                 return nchars;
274 
275             bBufHead            = bhead;
276             cBufTail           += DATA_BUFSIZE - ndst;
277 #else
278             char *xinbuf        = reinterpret_cast<char *>(bBufHead);
279             char *xoutbuf       = reinterpret_cast<char *>(cBufTail);
280             bufsz               = DATA_BUFSIZE * sizeof(lsp_wchar_t);
281 
282             // Perform conversion
283             size_t nconv        = ::iconv(hIconv, &xinbuf, &xinleft, &xoutbuf, &bufsz);
284             if (nconv == size_t(-1))
285             {
286                 int code = errno;
287                 switch (code)
288                 {
289                     case E2BIG:
290                     case EINVAL:
291                         break;
292                     default:
293                         return -STATUS_BAD_FORMAT;
294                 }
295             }
296 
297             bBufHead            = reinterpret_cast<uint8_t *>(xinbuf);
298             cBufTail            = reinterpret_cast<lsp_wchar_t *>(xoutbuf);
299 #endif
300             return cBufTail - cBufHead;
301         }
302 
fetch()303         lsp_swchar_t CharsetDecoder::fetch()
304         {
305             if (bBuffer == NULL)
306                 return -STATUS_CLOSED;
307 
308             // Is there any data in character buffer
309             if (cBufTail > cBufHead)
310                 return *(cBufHead++);
311 
312             ssize_t nchars   = decode_buffer();
313             if (nchars > 0)
314                 return *(cBufHead++);
315             return (nchars < 0) ? nchars : -STATUS_EOF;
316         }
317 
fetch(lsp_wchar_t * outbuf,size_t count)318         ssize_t CharsetDecoder::fetch(lsp_wchar_t *outbuf, size_t count)
319         {
320             if (bBuffer == NULL)
321                 return -STATUS_CLOSED;
322             else if (outbuf == NULL)
323                 return -STATUS_BAD_ARGUMENTS;
324 
325             // Compute the amount of data to read
326             size_t processed = 0;
327 
328             // Perform read
329             while (processed < count)
330             {
331                 // Perform decoding
332                 ssize_t nchars   = decode_buffer();
333                 if (nchars <= 0)
334                 {
335                     if (processed > 0)
336                         break;
337                     return nchars;
338                 }
339 
340                 // Copy data to output buffer
341                 ssize_t to_copy = count - processed;
342                 if (nchars > to_copy)
343                     nchars          = to_copy;
344                 ::memcpy(outbuf, cBufHead, nchars * sizeof(lsp_wchar_t));
345 
346                 // Update state
347                 cBufHead       += nchars;
348                 processed      += nchars;
349                 outbuf         += nchars;
350             }
351 
352             return processed;
353         }
354 
fetch(LSPString * out,size_t count)355         ssize_t CharsetDecoder::fetch(LSPString *out, size_t count)
356         {
357             if (bBuffer == NULL)
358                 return -STATUS_CLOSED;
359             else if (out == NULL)
360                 return -STATUS_BAD_ARGUMENTS;
361 
362             // Compute the amount of data to read
363             size_t processed = 0;
364             if (!count)
365                 count   = DATA_BUFSIZE*2;
366 
367             // Perform read
368             while (processed < count)
369             {
370                 // Perform decoding
371                 ssize_t nchars   = decode_buffer();
372                 if (nchars <= 0)
373                 {
374                     if (processed > 0)
375                         break;
376                     return nchars;
377                 }
378 
379                 // Copy data to output buffer
380                 ssize_t to_copy = count - processed;
381                 if (nchars > to_copy)
382                     nchars          = to_copy;
383                 if (!out->append(cBufHead, nchars))
384                     return -STATUS_NO_MEM;
385 
386                 // Update state
387                 cBufHead       += nchars;
388                 processed      += nchars;
389             }
390 
391             return processed;
392         }
393 
fetch(IOutSequence * out,size_t count)394         ssize_t CharsetDecoder::fetch(IOutSequence *out, size_t count)
395         {
396             if (bBuffer == NULL)
397                 return -STATUS_CLOSED;
398             else if (out == NULL)
399                 return -STATUS_BAD_ARGUMENTS;
400 
401             // Compute the amount of data to read
402             size_t processed = 0;
403             if (!count)
404                 count   = DATA_BUFSIZE*2;
405 
406             // Perform read
407             while (processed < count)
408             {
409                 // Perform decoding
410                 ssize_t nchars   = decode_buffer();
411                 if (nchars <= 0)
412                 {
413                     if (processed > 0)
414                         break;
415                     return nchars;
416                 }
417 
418                 // Write data to output sequence
419                 ssize_t to_copy = count - processed;
420                 if (nchars > to_copy)
421                     nchars          = to_copy;
422                 nchars = out->write(cBufHead, nchars);
423                 if (nchars < 0)
424                 {
425                     if (processed > 0)
426                         break;
427                     return nchars;
428                 }
429 
430                 // Update state
431                 cBufHead       += nchars;
432                 processed      += nchars;
433             }
434 
435             return processed;
436         }
437 
fill(const void * buf,size_t count)438         ssize_t CharsetDecoder::fill(const void *buf, size_t count)
439         {
440             if (bBuffer == NULL)
441                 return -STATUS_CLOSED;
442             else if (buf == NULL)
443                 return -STATUS_BAD_ARGUMENTS;
444 
445             size_t bufsz = prepare_buffer();
446             if (bufsz <= 0)
447                 return bufsz;
448 
449             if (count > bufsz)
450                 count   = bufsz;
451             ::memcpy(&bBufTail, buf, count);
452             bBufTail       += count;
453             return count;
454         }
455 
fill(File * fd,size_t count)456         ssize_t CharsetDecoder::fill(File *fd, size_t count)
457         {
458             if (bBuffer == NULL)
459                 return -STATUS_CLOSED;
460             else if (fd == NULL)
461                 return -STATUS_BAD_ARGUMENTS;
462 
463             // Is there a space in the buffer for reading?
464             size_t bufsz = prepare_buffer();
465             if (bufsz <= 0)
466                 return bufsz;
467 
468             // Compute the amount of data to read
469             size_t read = 0;
470             if ((!count) || (count > bufsz))
471                 count   = bufsz;
472 
473             // Perform read
474             do
475             {
476                 ssize_t nread   = fd->read(bBufTail, count - read);
477                 if (nread <= 0)
478                 {
479                     if (read > 0) // Ignore error if there is data on the input
480                         break;
481                     return nread;
482                 }
483 
484                 bBufTail       += nread;
485                 read           += nread;
486             }
487             while (read < count);
488 
489             return read;
490         }
491 
fill(IInStream * is,size_t count)492         ssize_t CharsetDecoder::fill(IInStream *is, size_t count)
493         {
494             if (bBuffer == NULL)
495                 return -STATUS_CLOSED;
496             else if (is == NULL)
497                 return -STATUS_BAD_ARGUMENTS;
498 
499             // Is there a space in the buffer for reading?
500             size_t bufsz = prepare_buffer();
501             if (bufsz <= 0)
502                 return bufsz;
503 
504             // Compute the amount of data to read
505             size_t read = 0;
506             if ((!count) || (count > bufsz))
507                 count   = bufsz;
508 
509             // Perform read
510             do
511             {
512                 ssize_t nread   = is->read(bBufTail, count - read);
513                 if (nread <= 0)
514                 {
515                     if (read > 0) // Ignore error if there is data on the input
516                         break;
517                     return nread;
518                 }
519 
520                 bBufTail       += nread;
521                 read           += nread;
522             }
523             while (read < count);
524 
525             return read;
526         }
527     } /* namespace io */
528 } /* namespace lsp */
529