1 /* 2 * GPAC - Multimedia Framework C SDK 3 * 4 * Authors: Jean Le Feuvre 5 * Copyright (c) Telecom ParisTech 2000-2019 6 * All rights reserved 7 * 8 * This file is part of GPAC / common tools sub-project 9 * 10 * GPAC is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser General Public License as published by 12 * the Free Software Foundation; either version 2, or (at your option) 13 * any later version. 14 * 15 * GPAC is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; see the file COPYING. If not, write to 22 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 * 24 */ 25 26 #ifndef _GF_UTF_H_ 27 #define _GF_UTF_H_ 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 /*! 34 \file <gpac/utf.h> 35 \brief UTF functions. 36 */ 37 38 /*! 39 \addtogroup utf_grp 40 \brief UTF and Unicode-related functions 41 42 This section documents the UTF functions of the GPAC framework.\n 43 The wide characters in GPAC are unsignad shorts, in other words GPAC only supports UTF8 and UTF16 coding styles. 44 45 \note these functions are just ports of libutf8 library tools into GPAC. 46 47 @{ 48 */ 49 50 #include <gpac/tools.h> 51 52 /*! 53 \brief wide-char to multibyte conversion 54 55 Converts a wide-char string to a multibyte string 56 \param dst multibyte destination buffer 57 \param dst_len multibyte destination buffer size 58 \param srcp address of the wide-char string. This will be set to the next char to be converted in the input buffer if not enough space in the destination, or NULL if conversion was completed. 59 \return length (in byte) of the multibyte string or -1 if error. 60 */ 61 size_t gf_utf8_wcstombs(char* dst, size_t dst_len, const unsigned short** srcp); 62 63 /*! 64 \brief multibyte to wide-char conversion 65 66 Converts a multibyte string to a wide-char string 67 \param dst wide-char destination buffer 68 \param dst_len wide-char destination buffer size 69 \param srcp address of the multibyte character buffer. This will be set to the next char to be converted in the input buffer if not enough space in the destination, or NULL if conversion was completed. 70 \return length (in unsigned short) of the wide-char string or -1 if error. 71 */ 72 size_t gf_utf8_mbstowcs(unsigned short* dst, size_t dst_len, const char** srcp); 73 74 /*! 75 \brief wide-char string length 76 77 Gets the length in character of a wide-char string 78 \param s the wide-char string 79 \return the wide-char string length 80 */ 81 size_t gf_utf8_wcslen(const unsigned short *s); 82 83 /*! 84 \brief returns a UTF8 string from a string started with BOM 85 86 Returns the length in character of a wide-char string 87 \param data the string or wide-char string 88 \param size of the data buffer 89 size of the data buffer 90 \param out_ptr set to an allocated buffer if needed for conversion, shall be destroyed by caller 91 \return the UTF8 string corresponding 92 */ 93 char *gf_utf_get_utf8_string_from_bom(u8 *data, u32 size, char **out_ptr); 94 95 /*! 96 \brief string bidi reordering 97 98 Performs a simple reordering of words in the string based on each word direction, so that glyphs are sorted in display order. 99 \param utf_string the wide-char string 100 \param len the len of the wide-char string 101 \return 1 if the main direction is right-to-left, 0 otherwise 102 */ 103 Bool gf_utf8_reorder_bidi(u16 *utf_string, u32 len); 104 105 /*! maximum character size in bytes*/ 106 static const size_t UTF8_MAX_BYTES_PER_CHAR = 4; 107 108 109 /*! 110 \brief Unicode conversion from UTF-8 to UCS-4 111 \param ucs4_buf The UCS-4 buffer to fill 112 \param utf8_len The length of the UTF-8 buffer 113 \param utf8_buf The buffer containing the UTF-8 data 114 \return the length of the ucs4_buf. Note that the ucs4_buf should be allocated by parent and should be at least utf8_len * 4 115 */ 116 u32 utf8_to_ucs4 (u32 *ucs4_buf, u32 utf8_len, unsigned char *utf8_buf); 117 118 119 120 121 #if defined(WIN32) 122 123 wchar_t* gf_utf8_to_wcs(const char* str); 124 char* gf_wcs_to_utf8(const wchar_t* str); 125 126 #endif 127 128 /*! @} */ 129 130 #ifdef __cplusplus 131 } 132 #endif 133 134 135 #endif /*_GF_UTF_H_*/ 136 137