1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 18 июн. 2018 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef CORE_IO_CHARSET_H_ 23 #define CORE_IO_CHARSET_H_ 24 25 #include <core/types.h> 26 27 #if defined(PLATFORM_WINDOWS) 28 #include <winnls.h> 29 #else 30 #include <iconv.h> 31 #include <locale.h> 32 #endif /* PLATFORM_WINDOWS */ 33 34 #define LSP_UTF32_EOF lsp_utf32_t(-1) 35 36 namespace lsp 37 { 38 #if defined(PLATFORM_WINDOWS) 39 40 ssize_t get_codepage(LCID locale, bool ansi = true); 41 42 ssize_t codepage_from_name(const char *charset); 43 44 /** 45 * This is a MultiByteToWideChar wrapping function that handles additional cases 46 * for non-MBCS encodings 47 * @param cp code page 48 * @param src source buffer 49 * @param nsrc number of bytes available in source buffer (will be updated on exit) 50 * @param dst destination buffer (can be NULL for just estimating the length) 51 * @param ndst number of characters in destination buffer (will be updated on exit, 52 * can be NULL or point to zero value for just estimating the length) 53 * @return number of UTF-16 characters written to destination buffer or negative error code 54 */ 55 ssize_t multibyte_to_widechar(size_t cp, LPCCH src, size_t *nsrc, LPWSTR dst, size_t *ndst); 56 57 /** 58 * This is a WideCharToMultiByte wrapping function that handles additional cases 59 * for non-MBCS encodings 60 * @param cp code page 61 * @param src source wide string to perform encoding 62 * @param nsrc number of characters available in source wide string (will be updated on exit) 63 * @param dst target string to perform encoding (can be NULL for just estimating the length) 64 * @param ndst number of bytes in destination buffer (will be updated on exit, 65 * can be NULL or point to zero value for just estimating the length) 66 * @return number of bytes written to destination buffer or negative error code 67 */ 68 ssize_t widechar_to_multibyte(size_t cp, LPCWCH src, size_t *nsrc, LPSTR dst, size_t *ndst); 69 #else 70 71 iconv_t init_iconv_to_wchar_t(const char *charset); 72 73 iconv_t init_iconv_from_wchar_t(const char *charset); 74 75 #endif /* PLATFORM_WINDOWS */ 76 77 /** 78 * Read UTF-16 codepoint from the NULL-terminated UTF-16 string, replace invalid 79 * code sequence by 0xfffd code point 80 * @param str pointer to the NULL-terminated UTF-16 string 81 * @return code point 82 */ 83 lsp_utf32_t read_utf16le_codepoint(const lsp_utf16_t **str); 84 lsp_utf32_t read_utf16be_codepoint(const lsp_utf16_t **str); read_utf16_codepoint(const lsp_utf16_t ** str)85 inline lsp_utf32_t read_utf16_codepoint(const lsp_utf16_t **str) { return __IF_LEBE(read_utf16le_codepoint, read_utf16be_codepoint)(str); }; 86 87 /** 88 * Read UTF-16 codepoint from the NULL-terminated UTF-16 string in streaming mode, 89 * replace invalid code sequence by 0xfffd code point 90 * @param str pointer to the pointer to the NULL-terminated UTF-16 string 91 * @param nsrc counter containing number of unread array elements 92 * @param force process data as there will be no future data on the input 93 * @return parsed code point or LSP_UTF32_EOF as end of sequence 94 */ 95 lsp_utf32_t read_utf16le_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force); 96 lsp_utf32_t read_utf16be_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force); read_utf16_streaming(const lsp_utf16_t ** str,size_t * nsrc,bool force)97 inline lsp_utf32_t read_utf16_streaming(const lsp_utf16_t **str, size_t *nsrc, bool force) { return __IF_LEBE(read_utf16le_streaming, read_utf16be_streaming)(str, nsrc, force); } 98 99 /** 100 * Read UTF-16 codepoint from the NULL-terminated UTF-8 string, replace invalid 101 * code sequence by 0xfffd code point 102 * @param str pointer to the NULL-terminated UTF-8 string 103 * @return code point 104 */ 105 lsp_utf32_t read_utf8_codepoint(const char **str); 106 107 /** 108 * Read UTF-8 codepoint from the NULL-terminated UTF-8 string in streaming mode, 109 * replace invalid code sequence by 0xfffd code point 110 * @param str pointer to the pointer to the NULL-terminated UTF-8 string 111 * @param nsrc counter containing number of unread array elements 112 * @param force process data as there will be no future data on the input 113 * @return parsed code point or LSP_UTF32_EOF as end of sequence 114 */ 115 lsp_utf32_t read_utf8_streaming(const char **str, size_t *nsrc, bool force); 116 117 /** 118 * Write UTF-8 code point to buffer, buffer should be of enough size 119 * @param str pointer to target buffer 120 * @param cp code point to write 121 */ 122 void write_utf8_codepoint(char **str, lsp_utf32_t cp); 123 124 /** 125 * Write UTF-16 code point to buffer, buffer should be of enough size 126 * @param str pointer to target buffer 127 * @param cp code point to write 128 */ 129 void write_utf16le_codepoint(lsp_utf16_t **str, lsp_utf32_t cp); 130 void write_utf16be_codepoint(lsp_utf16_t **str, lsp_utf32_t cp); write_utf16_codepoint(lsp_utf16_t ** str,lsp_utf32_t cp)131 inline void write_utf16_codepoint(lsp_utf16_t **str, lsp_utf32_t cp) { return __IF_LEBE(write_utf16le_codepoint, write_utf16be_codepoint)(str, cp); } 132 133 /** 134 * Encode NULL-terminated UTF-8 string to NULL-terminated UTF-16 string 135 * @param str string to encode 136 * @return pointer to allocated UTF-16 string that should be free()'d after use 137 */ 138 lsp_utf16_t *utf8_to_utf16le(const char *str); 139 lsp_utf16_t *utf8_to_utf16be(const char *str); utf8_to_utf16(const char * str)140 inline lsp_utf16_t *utf8_to_utf16(const char *str) { return __IF_LEBE(utf8_to_utf16le, utf8_to_utf16be)(str); } 141 142 /** 143 * Encode sequence of UTF-8 characters into sequence of UTF-16 characters 144 * @param dst target buffer to store characters 145 * @param ndst number of elements available in target buffer 146 * @param src source buffer to read characters 147 * @param nsrc number of elements available in source buffer 148 * @param force force flag that treats the input block as last in the character sequence 149 * @return number of processed code points 150 */ 151 size_t utf8_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force); 152 size_t utf8_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force); utf8_to_utf16(lsp_utf16_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)153 inline size_t utf8_to_utf16(lsp_utf16_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) { return __IF_LEBE(utf8_to_utf16le, utf8_to_utf16be)(dst, ndst, src, nsrc, force); } 154 155 /** 156 * Encode NULL-terminated UTF-8 string to NULL-terminated UTF-32 string 157 * @param str string to encode 158 * @return pointer to allocated UTF-32 string that should be free()'d after use 159 */ 160 lsp_utf32_t *utf8_to_utf32le(const char *str); 161 lsp_utf32_t *utf8_to_utf32be(const char *str); utf8_to_utf32(const char * str)162 inline lsp_utf32_t *utf8_to_utf32(const char *str) { return __IF_LEBE(utf8_to_utf32le, utf8_to_utf32be)(str); }; 163 164 /** 165 * Encode sequence of UTF-8 characters into sequence of UTF-32 characters 166 * @param dst target buffer to store characters 167 * @param ndst number of elements available in target buffer 168 * @param src source buffer to read characters 169 * @param nsrc number of elements available in source buffer 170 * @param force force flag that treats the input block as last in the character sequence 171 * @return number of processed code points 172 */ 173 size_t utf8_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force); 174 size_t utf8_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force); utf8_to_utf32(lsp_utf32_t * dst,size_t * ndst,const char * src,size_t * nsrc,bool force)175 inline size_t utf8_to_utf32(lsp_utf32_t *dst, size_t *ndst, const char *src, size_t *nsrc, bool force) { return __IF_LEBE(utf8_to_utf32le, utf8_to_utf32be)(dst, ndst, src, nsrc, force); }; 176 177 /** 178 * Encode NULL-terminated UTF-16 string to NULL-terminated UTF-8 string 179 * @param str string to encode 180 * @return pointer to allocated UTF-8 string that should be free()'d after use 181 */ 182 char *utf16le_to_utf8(const lsp_utf16_t *str); 183 char *utf16be_to_utf8(const lsp_utf16_t *str); utf16_to_utf8(const lsp_utf16_t * str)184 inline char *utf16_to_utf8(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf8, utf16be_to_utf8)(str); }; 185 186 /** 187 * Encode sequence of UTF-16 characters into sequence of UTF-16 characters 188 * @param dst target buffer to store characters 189 * @param ndst number of elements available in target buffer 190 * @param src source buffer to read characters 191 * @param nsrc number of elements available in source buffer 192 * @param force force flag that treats the input block as last in the character sequence 193 * @return number of processed code points 194 */ 195 size_t utf16le_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force); 196 size_t utf16be_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force); utf16_to_utf8(char * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)197 inline size_t utf16_to_utf8(char *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf8, utf16be_to_utf8)(dst, ndst, src, nsrc, force); }; 198 199 /** 200 * Encode NULL-terminated UTF-16 string to NULL-terminated UTF-32 string 201 * @param str string to encode 202 * @return pointer to allocated UTF-32 string that should be free()'d after use 203 */ 204 lsp_utf32_t *utf16le_to_utf32le(const lsp_utf16_t *str); 205 lsp_utf32_t *utf16le_to_utf32be(const lsp_utf16_t *str); 206 lsp_utf32_t *utf16be_to_utf32le(const lsp_utf16_t *str); 207 lsp_utf32_t *utf16be_to_utf32be(const lsp_utf16_t *str); 208 utf16le_to_utf32(const lsp_utf16_t * str)209 inline lsp_utf32_t *utf16le_to_utf32(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32le, utf16le_to_utf32be)(str); } utf16be_to_utf32(const lsp_utf16_t * str)210 inline lsp_utf32_t *utf16be_to_utf32(const lsp_utf16_t *str) { return __IF_LEBE(utf16be_to_utf32le, utf16be_to_utf32be)(str); } utf16_to_utf32le(const lsp_utf16_t * str)211 inline lsp_utf32_t *utf16_to_utf32le(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32le)(str); } utf16_to_utf32be(const lsp_utf16_t * str)212 inline lsp_utf32_t *utf16_to_utf32be(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32be, utf16be_to_utf32be)(str); } utf16_to_utf32(const lsp_utf16_t * str)213 inline lsp_utf32_t *utf16_to_utf32(const lsp_utf16_t *str) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32be)(str); } 214 215 /** 216 * Encode sequence of UTF-16 characters into sequence of UTF-32 characters 217 * @param dst target buffer to store characters 218 * @param ndst number of elements available in target buffer 219 * @param src source buffer to read characters 220 * @param nsrc number of elements available in source buffer 221 * @param force force flag that treats the input block as last in the character sequence 222 * @return number of processed code points 223 */ 224 size_t utf16le_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force); 225 size_t utf16le_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force); 226 size_t utf16be_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force); 227 size_t utf16be_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force); 228 utf16le_to_utf32(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)229 inline size_t utf16le_to_utf32(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32le, utf16le_to_utf32be)(dst, ndst, src, nsrc, force); } utf16be_to_utf32(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)230 inline size_t utf16be_to_utf32(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16be_to_utf32le, utf16be_to_utf32be)(dst, ndst, src, nsrc, force); } utf16_to_utf32le(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)231 inline size_t utf16_to_utf32le(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32le)(dst, ndst, src, nsrc, force); } utf16_to_utf32be(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)232 inline size_t utf16_to_utf32be(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32be, utf16be_to_utf32be)(dst, ndst, src, nsrc, force); } utf16_to_utf32(lsp_utf32_t * dst,size_t * ndst,const lsp_utf16_t * src,size_t * nsrc,bool force)233 inline size_t utf16_to_utf32(lsp_utf32_t *dst, size_t *ndst, const lsp_utf16_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf16le_to_utf32le, utf16be_to_utf32be)(dst, ndst, src, nsrc, force); } 234 235 /** 236 * Encode NULL-terminated UTF-32 string to NULL-terminated UTF-8 string 237 * @param str string to encode 238 * @return pointer to allocated UTF-16 string that should be free()'d after use 239 */ 240 char *utf32le_to_utf8(const lsp_utf32_t *str); 241 char *utf32be_to_utf8(const lsp_utf32_t *str); utf32_to_utf8(const lsp_utf32_t * str)242 inline char *utf32_to_utf8(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf8, utf32be_to_utf8)(str); }; 243 244 /** 245 * Encode sequence of UTF-8 characters into sequence of UTF-16 characters 246 * @param dst target buffer to store characters 247 * @param ndst number of elements available in target buffer 248 * @param src source buffer to read characters 249 * @param nsrc number of elements available in source buffer 250 * @param force force flag that treats the input block as last in the character sequence 251 * @return number of processed code points 252 */ 253 size_t utf32le_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force); 254 size_t utf32be_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force); utf32_to_utf8(char * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)255 inline size_t utf32_to_utf8(char *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf8, utf32be_to_utf8)(dst, ndst, src, nsrc, force); } 256 257 /** 258 * Encode NULL-terminated UTF-32 string to NULL-terminated UTF-16 string 259 * @param str string to encode 260 * @return pointer to allocated UTF-16 string that should be free()'d after use 261 */ 262 lsp_utf16_t *utf32le_to_utf16le(const lsp_utf32_t *str); 263 lsp_utf16_t *utf32le_to_utf16be(const lsp_utf32_t *str); 264 lsp_utf16_t *utf32be_to_utf16le(const lsp_utf32_t *str); 265 lsp_utf16_t *utf32be_to_utf16be(const lsp_utf32_t *str); 266 utf32le_to_utf16(const lsp_utf32_t * str)267 inline lsp_utf16_t *utf32le_to_utf16(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16le, utf32le_to_utf16be)(str); } utf32be_to_utf16(const lsp_utf32_t * str)268 inline lsp_utf16_t *utf32be_to_utf16(const lsp_utf32_t *str) { return __IF_LEBE(utf32be_to_utf16le, utf32be_to_utf16be)(str); } utf32_to_utf16le(const lsp_utf32_t * str)269 inline lsp_utf16_t *utf32_to_utf16le(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16le)(str); } utf32_to_utf16be(const lsp_utf32_t * str)270 inline lsp_utf16_t *utf32_to_utf16be(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16be, utf32be_to_utf16be)(str); } utf32_to_utf16(const lsp_utf32_t * str)271 inline lsp_utf16_t *utf32_to_utf16(const lsp_utf32_t *str) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16be)(str); } 272 273 /** 274 * Encode sequence of UTF-8 characters into sequence of UTF-16 characters 275 * @param dst target buffer to store characters 276 * @param ndst number of elements available in target buffer 277 * @param src source buffer to read characters 278 * @param nsrc number of elements available in source buffer 279 * @param force force flag that treats the input block as last in the character sequence 280 * @return number of processed code points 281 */ 282 size_t utf32le_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force); 283 size_t utf32le_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force); 284 size_t utf32be_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force); 285 size_t utf32be_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force); 286 utf32_to_utf16le(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)287 inline size_t utf32_to_utf16le(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16le)(dst, ndst, src, nsrc, force); } utf32_to_utf16be(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)288 inline size_t utf32_to_utf16be(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16be, utf32be_to_utf16be)(dst, ndst, src, nsrc, force); } utf32le_to_utf16(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)289 inline size_t utf32le_to_utf16(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16le, utf32le_to_utf16be)(dst, ndst, src, nsrc, force); } utf32be_to_utf16(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)290 inline size_t utf32be_to_utf16(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32be_to_utf16le, utf32be_to_utf16be)(dst, ndst, src, nsrc, force); } utf32_to_utf16(lsp_utf16_t * dst,size_t * ndst,const lsp_utf32_t * src,size_t * nsrc,bool force)291 inline size_t utf32_to_utf16(lsp_utf16_t *dst, size_t *ndst, const lsp_utf32_t *src, size_t *nsrc, bool force) { return __IF_LEBE(utf32le_to_utf16le, utf32be_to_utf16be)(dst, ndst, src, nsrc, force); } 292 293 } 294 295 #endif /* CORE_IO_CHARSET_H_ */ 296