1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 18 июн. 2018 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef CORE_IO_CHARSET_H_
23 #define CORE_IO_CHARSET_H_
24 
25 #include <core/types.h>
26 
27 #if defined(PLATFORM_WINDOWS)
28     #include <winnls.h>
29 #else
30     #include <iconv.h>
31     #include <locale.h>
32 #endif /* PLATFORM_WINDOWS */
33 
34 #define LSP_UTF32_EOF       lsp_utf32_t(-1)
35 
36 namespace lsp
37 {
38 #if defined(PLATFORM_WINDOWS)
39 
40     ssize_t get_codepage(LCID locale, bool ansi = true);
41 
42     ssize_t codepage_from_name(const char *charset);
43 
44     /**
45      * This is a MultiByteToWideChar wrapping function that handles additional cases
46      * for non-MBCS encodings
47      * @param cp code page
48      * @param src source buffer
49      * @param nsrc number of bytes available in source buffer (will be updated on exit)
50      * @param dst destination buffer (can be NULL for just estimating the length)
51      * @param ndst number of characters in destination buffer (will be updated on exit,
52      *        can be NULL or point to zero value for just estimating the length)
53      * @return number of UTF-16 characters written to destination buffer or negative error code
54      */
55     ssize_t multibyte_to_widechar(size_t cp, LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst);
56 
57     /**
58      * This is a WideCharToMultiByte wrapping function that handles additional cases
59      * for non-MBCS encodings
60      * @param cp code page
61      * @param src source wide string to perform encoding
62      * @param nsrc number of characters available in source wide string (will be updated on exit)
63      * @param dst target string to perform encoding (can be NULL for just estimating the length)
64      * @param ndst number of bytes in destination buffer (will be updated on exit,
65      *        can be NULL or point to zero value for just estimating the length)
66      * @return number of bytes written to destination buffer or negative error code
67      */
68     ssize_t widechar_to_multibyte(size_t cp, LPCWCH src, size_t *nsrc, LPSTR dst, size_t *ndst);
69 #else
70 
71     iconv_t init_iconv_to_wchar_t(const char *charset);
72 
73     iconv_t init_iconv_from_wchar_t(const char *charset);
74 
75 #endif /* PLATFORM_WINDOWS */
76 
77     /**
78      * Read UTF-16 codepoint from the NULL-terminated UTF-16 string, replace invalid
79      * code sequence by 0xfffd code point
80      * @param str pointer to the NULL-terminated UTF-16 string
81      * @return code point
82      */
83     lsp_utf32_t     read_utf16le_codepoint(const lsp_utf16_t **str);
84     lsp_utf32_t     read_utf16be_codepoint(const lsp_utf16_t **str);
read_utf16_codepoint(const lsp_utf16_t ** str)85     inline lsp_utf32_t  read_utf16_codepoint(const lsp_utf16_t **str) { return __IF_LEBE(read_utf16le_codepoint, read_utf16be_codepoint)(str); };
86 
87     /**
88      * Read UTF-16 codepoint from the NULL-terminated UTF-16 string in streaming mode,
89      * replace invalid code sequence by 0xfffd code point
90      * @param str pointer to the pointer to the NULL-terminated UTF-16 string
91      * @param nsrc counter containing number of unread array elements
92      * @param force process data as there will be no future data on the input
93      * @return parsed code point or LSP_UTF32_EOF as end of sequence
94      */
95     lsp_utf32_t         read_utf16le_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force);
96     lsp_utf32_t         read_utf16be_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force);
read_utf16_streaming(const lsp_utf16_t ** str,size_t * nsrc,bool force)97     inline lsp_utf32_t  read_utf16_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force) { return __IF_LEBE(read_utf16le_streaming, read_utf16be_streaming)(str, nsrc, force); }
98 
99     /**
100      * Read UTF-16 codepoint from the NULL-terminated UTF-8 string, replace invalid
101      * code sequence by 0xfffd code point
102      * @param str pointer to the NULL-terminated UTF-8 string
103      * @return code point
104      */
105     lsp_utf32_t     read_utf8_codepoint(const char **str);
106 
107     /**
108      * Read UTF-8 codepoint from the NULL-terminated UTF-8 string in streaming mode,
109      * replace invalid code sequence by 0xfffd code point
110      * @param str pointer to the pointer to the NULL-terminated UTF-8 string
111      * @param nsrc counter containing number of unread array elements
112      * @param force process data as there will be no future data on the input
113      * @return parsed code point or LSP_UTF32_EOF as end of sequence
114      */
115     lsp_utf32_t     read_utf8_streaming(const char **str, size_t *nsrc, bool force);
116 
117     /**
118      * Write UTF-8 code point to buffer, buffer should be of enough size
119      * @param str pointer to target buffer
120      * @param cp code point to write
121      */
122     void            write_utf8_codepoint(char **str, lsp_utf32_t cp);
123 
124     /**
125      * Write UTF-16 code point to buffer, buffer should be of enough size
126      * @param str pointer to target buffer
127      * @param cp code point to write
128      */
129     void            write_utf16le_codepoint(lsp_utf16_t **str, lsp_utf32_t cp);
130     void            write_utf16be_codepoint(lsp_utf16_t **str, lsp_utf32_t cp);
write_utf16_codepoint(lsp_utf16_t ** str,lsp_utf32_t cp)131     inline void     write_utf16_codepoint(lsp_utf16_t **str, lsp_utf32_t cp) { return __IF_LEBE(write_utf16le_codepoint, write_utf16be_codepoint)(str, cp); }
132 
133     /**
134      * Encode NULL-terminated UTF-8 string to NULL-terminated UTF-16 string
135      * @param str string to encode
136      * @return pointer to allocated UTF-16 string that should be free()'d after use
137      */
138     lsp_utf16_t    *utf8_to_utf16le(const char *str);
139     lsp_utf16_t    *utf8_to_utf16be(const char *str);
utf8_to_utf16(const char * str)140     inline lsp_utf16_t    *utf8_to_utf16(const char *str) { return __IF_LEBE(utf8_to_utf16le, utf8_to_utf16be)(str); }
141 
142     /**
143      * Encode sequence of UTF-8 characters into sequence of UTF-16 characters
144      * @param dst target buffer to store characters
145      * @param ndst number of elements available in target buffer
146      * @param src source buffer to read characters
147      * @param nsrc number of elements available in source buffer
148      * @param force force flag that treats the input block as last in the character sequence
149      * @return number of processed code points
150      */
151     size_t          utf8_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force);
152     size_t          utf8_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force);
utf8_to_utf16(lsp_utf16_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)153     inline size_t   utf8_to_utf16(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) { return __IF_LEBE(utf8_to_utf16le, utf8_to_utf16be)(dst, ndst, src, nsrc, force); }
154 
155     /**
156      * Encode NULL-terminated UTF-8 string to NULL-terminated UTF-32 string
157      * @param str string to encode
158      * @return pointer to allocated UTF-32 string that should be free()'d after use
159      */
160     lsp_utf32_t    *utf8_to_utf32le(const char *str);
161     lsp_utf32_t    *utf8_to_utf32be(const char *str);
utf8_to_utf32(const char * str)162     inline lsp_utf32_t    *utf8_to_utf32(const char *str) { return __IF_LEBE(utf8_to_utf32le, utf8_to_utf32be)(str); };
163 
164     /**
165      * Encode sequence of UTF-8 characters into sequence of UTF-32 characters
166      * @param dst target buffer to store characters
167      * @param ndst number of elements available in target buffer
168      * @param src source buffer to read characters
169      * @param nsrc number of elements available in source buffer
170      * @param force force flag that treats the input block as last in the character sequence
171      * @return number of processed code points
172      */
173     size_t          utf8_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force);
174     size_t          utf8_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force);
utf8_to_utf32(lsp_utf32_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)175     inline size_t   utf8_to_utf32(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) { return __IF_LEBE(utf8_to_utf32le, utf8_to_utf32be)(dst, ndst, src, nsrc, force); };
176 
177     /**
178      * Encode NULL-terminated UTF-16 string to NULL-terminated UTF-8 string
179      * @param str string to encode
180      * @return pointer to allocated UTF-8 string that should be free()'d after use
181      */
182     char           *utf16le_to_utf8(const lsp_utf16_t *str);
183     char           *utf16be_to_utf8(const lsp_utf16_t *str);
utf16_to_utf8(const lsp_utf16_t * str)184     inline char    *utf16_to_utf8(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf8, utf16be_to_utf8)(str); };
185 
186     /**
187      * Encode sequence of UTF-16 characters into sequence of UTF-16 characters
188      * @param dst target buffer to store characters
189      * @param ndst number of elements available in target buffer
190      * @param src source buffer to read characters
191      * @param nsrc number of elements available in source buffer
192      * @param force force flag that treats the input block as last in the character sequence
193      * @return number of processed code points
194      */
195     size_t          utf16le_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force);
196     size_t          utf16be_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force);
utf16_to_utf8(char * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)197     inline size_t   utf16_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf8, utf16be_to_utf8)(dst, ndst, src, nsrc, force); };
198 
199     /**
200      * Encode NULL-terminated UTF-16 string to NULL-terminated UTF-32 string
201      * @param str string to encode
202      * @return pointer to allocated UTF-32 string that should be free()'d after use
203      */
204     lsp_utf32_t    *utf16le_to_utf32le(const lsp_utf16_t *str);
205     lsp_utf32_t    *utf16le_to_utf32be(const lsp_utf16_t *str);
206     lsp_utf32_t    *utf16be_to_utf32le(const lsp_utf16_t *str);
207     lsp_utf32_t    *utf16be_to_utf32be(const lsp_utf16_t *str);
208 
utf16le_to_utf32(const lsp_utf16_t * str)209     inline lsp_utf32_t    *utf16le_to_utf32(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32le, utf16le_to_utf32be)(str); }
utf16be_to_utf32(const lsp_utf16_t * str)210     inline lsp_utf32_t    *utf16be_to_utf32(const lsp_utf16_t *str) { return __IF_LEBE(utf16be_to_utf32le, utf16be_to_utf32be)(str); }
utf16_to_utf32le(const lsp_utf16_t * str)211     inline lsp_utf32_t    *utf16_to_utf32le(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32le)(str); }
utf16_to_utf32be(const lsp_utf16_t * str)212     inline lsp_utf32_t    *utf16_to_utf32be(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32be, utf16be_to_utf32be)(str); }
utf16_to_utf32(const lsp_utf16_t * str)213     inline lsp_utf32_t    *utf16_to_utf32(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32be)(str); }
214 
215     /**
216      * Encode sequence of UTF-16 characters into sequence of UTF-32 characters
217      * @param dst target buffer to store characters
218      * @param ndst number of elements available in target buffer
219      * @param src source buffer to read characters
220      * @param nsrc number of elements available in source buffer
221      * @param force force flag that treats the input block as last in the character sequence
222      * @return number of processed code points
223      */
224     size_t          utf16le_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force);
225     size_t          utf16le_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force);
226     size_t          utf16be_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force);
227     size_t          utf16be_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force);
228 
utf16le_to_utf32(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)229     inline size_t  utf16le_to_utf32(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32le, utf16le_to_utf32be)(dst, ndst, src, nsrc, force); }
utf16be_to_utf32(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)230     inline size_t  utf16be_to_utf32(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16be_to_utf32le, utf16be_to_utf32be)(dst, ndst, src, nsrc, force); }
utf16_to_utf32le(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)231     inline size_t  utf16_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32le)(dst, ndst, src, nsrc, force); }
utf16_to_utf32be(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)232     inline size_t  utf16_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32be, utf16be_to_utf32be)(dst, ndst, src, nsrc, force); }
utf16_to_utf32(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)233     inline size_t  utf16_to_utf32(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32be)(dst, ndst, src, nsrc, force); }
234 
235     /**
236      * Encode NULL-terminated UTF-32 string to NULL-terminated UTF-8 string
237      * @param str string to encode
238      * @return pointer to allocated UTF-16 string that should be free()'d after use
239      */
240     char           *utf32le_to_utf8(const lsp_utf32_t *str);
241     char           *utf32be_to_utf8(const lsp_utf32_t *str);
utf32_to_utf8(const lsp_utf32_t * str)242     inline char    *utf32_to_utf8(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf8, utf32be_to_utf8)(str); };
243 
244     /**
245      * Encode sequence of UTF-8 characters into sequence of UTF-16 characters
246      * @param dst target buffer to store characters
247      * @param ndst number of elements available in target buffer
248      * @param src source buffer to read characters
249      * @param nsrc number of elements available in source buffer
250      * @param force force flag that treats the input block as last in the character sequence
251      * @return number of processed code points
252      */
253     size_t          utf32le_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force);
254     size_t          utf32be_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force);
utf32_to_utf8(char * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)255     inline size_t   utf32_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf8, utf32be_to_utf8)(dst, ndst, src, nsrc, force); }
256 
257     /**
258      * Encode NULL-terminated UTF-32 string to NULL-terminated UTF-16 string
259      * @param str string to encode
260      * @return pointer to allocated UTF-16 string that should be free()'d after use
261      */
262     lsp_utf16_t    *utf32le_to_utf16le(const lsp_utf32_t *str);
263     lsp_utf16_t    *utf32le_to_utf16be(const lsp_utf32_t *str);
264     lsp_utf16_t    *utf32be_to_utf16le(const lsp_utf32_t *str);
265     lsp_utf16_t    *utf32be_to_utf16be(const lsp_utf32_t *str);
266 
utf32le_to_utf16(const lsp_utf32_t * str)267     inline lsp_utf16_t *utf32le_to_utf16(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16le, utf32le_to_utf16be)(str);  }
utf32be_to_utf16(const lsp_utf32_t * str)268     inline lsp_utf16_t *utf32be_to_utf16(const lsp_utf32_t *str) { return __IF_LEBE(utf32be_to_utf16le, utf32be_to_utf16be)(str);  }
utf32_to_utf16le(const lsp_utf32_t * str)269     inline lsp_utf16_t *utf32_to_utf16le(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16le)(str);  }
utf32_to_utf16be(const lsp_utf32_t * str)270     inline lsp_utf16_t *utf32_to_utf16be(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16be, utf32be_to_utf16be)(str);  }
utf32_to_utf16(const lsp_utf32_t * str)271     inline lsp_utf16_t *utf32_to_utf16(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16be)(str);  }
272 
273     /**
274      * Encode sequence of UTF-8 characters into sequence of UTF-16 characters
275      * @param dst target buffer to store characters
276      * @param ndst number of elements available in target buffer
277      * @param src source buffer to read characters
278      * @param nsrc number of elements available in source buffer
279      * @param force force flag that treats the input block as last in the character sequence
280      * @return number of processed code points
281      */
282     size_t          utf32le_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force);
283     size_t          utf32le_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force);
284     size_t          utf32be_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force);
285     size_t          utf32be_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force);
286 
utf32_to_utf16le(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)287     inline size_t   utf32_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16le)(dst, ndst, src, nsrc, force); }
utf32_to_utf16be(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)288     inline size_t   utf32_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16be, utf32be_to_utf16be)(dst, ndst, src, nsrc, force); }
utf32le_to_utf16(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)289     inline size_t   utf32le_to_utf16(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16le, utf32le_to_utf16be)(dst, ndst, src, nsrc, force); }
utf32be_to_utf16(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)290     inline size_t   utf32be_to_utf16(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32be_to_utf16le, utf32be_to_utf16be)(dst, ndst, src, nsrc, force); }
utf32_to_utf16(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)291     inline size_t   utf32_to_utf16(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force)   { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16be)(dst, ndst, src, nsrc, force); }
292 
293 }
294 
295 #endif /* CORE_IO_CHARSET_H_ */
296