1 /*
2  *			GPAC - Multimedia Framework C SDK
3  *
4  *			Authors: Jean Le Feuvre
5  *			Copyright (c) Telecom ParisTech 2000-2019
6  *					All rights reserved
7  *
8  *  This file is part of GPAC / common tools sub-project
9  *
10  *  GPAC is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU Lesser General Public License as published by
12  *  the Free Software Foundation; either version 2, or (at your option)
13  *  any later version.
14  *
15  *  GPAC is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU Lesser General Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser General Public
21  *  License along with this library; see the file COPYING.  If not, write to
22  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25 
26 #ifndef _GF_UTF_H_
27 #define _GF_UTF_H_
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 /*!
34 \file <gpac/utf.h>
35 \brief UTF functions.
36  */
37 
38 /*!
39 \addtogroup utf_grp
40 \brief UTF and Unicode-related functions
41 
42 This section documents the UTF functions of the GPAC framework.\n
43 The wide characters in GPAC are unsignad shorts, in other words GPAC only supports UTF8 and UTF16 coding styles.
44 
45 \note these functions are just ports of libutf8 library tools into GPAC.
46 
47 @{
48  */
49 
50 #include <gpac/tools.h>
51 
52 /*!
53 \brief wide-char to multibyte conversion
54 
55 Converts a wide-char string to a multibyte string
56 \param dst multibyte destination buffer
57 \param dst_len multibyte destination buffer size
58 \param srcp address of the wide-char string. This will be set to the next char to be converted in the input buffer if not enough space in the destination, or NULL if conversion was completed.
59 \return length (in byte) of the multibyte string or -1 if error.
60  */
61 size_t gf_utf8_wcstombs(char* dst, size_t dst_len, const unsigned short** srcp);
62 
63 /*!
64 \brief multibyte to wide-char conversion
65 
66 Converts a multibyte string to a wide-char string
67 \param dst wide-char destination buffer
68 \param dst_len wide-char destination buffer size
69 \param srcp address of the multibyte character buffer. This will be set to the next char to be converted in the input buffer if not enough space in the destination, or NULL if conversion was completed.
70 \return length (in unsigned short) of the wide-char string or -1 if error.
71  */
72 size_t gf_utf8_mbstowcs(unsigned short* dst, size_t dst_len, const char** srcp);
73 
74 /*!
75 \brief wide-char string length
76 
77 Gets the length in character of a wide-char string
78 \param s the wide-char string
79 \return the wide-char string length
80  */
81 size_t gf_utf8_wcslen(const unsigned short *s);
82 
83 /*!
84 \brief returns a UTF8 string from a string started with BOM
85 
86 Returns the length in character of a wide-char string
87 \param data the string or wide-char string
88 \param size of the data buffer
89   size of the data buffer
90 \param out_ptr set to an allocated buffer if needed for conversion, shall be destroyed by caller
91 \return the UTF8 string corresponding
92  */
93 char *gf_utf_get_utf8_string_from_bom(u8 *data, u32 size, char **out_ptr);
94 
95 /*!
96 \brief string bidi reordering
97 
98 Performs a simple reordering of words in the string based on each word direction, so that glyphs are sorted in display order.
99 \param utf_string the wide-char string
100 \param len the len of the wide-char string
101 \return 1 if the main direction is right-to-left, 0 otherwise
102  */
103 Bool gf_utf8_reorder_bidi(u16 *utf_string, u32 len);
104 
105 /*! maximum character size in bytes*/
106 static const size_t UTF8_MAX_BYTES_PER_CHAR = 4;
107 
108 
109 /*!
110 \brief Unicode conversion from UTF-8 to UCS-4
111 \param ucs4_buf The UCS-4 buffer to fill
112 \param utf8_len The length of the UTF-8 buffer
113 \param utf8_buf The buffer containing the UTF-8 data
114 \return the length of the ucs4_buf. Note that the ucs4_buf should be allocated by parent and should be at least utf8_len * 4
115  */
116 u32 utf8_to_ucs4 (u32 *ucs4_buf, u32 utf8_len, unsigned char *utf8_buf);
117 
118 
119 
120 
121 #if defined(WIN32)
122 
123 wchar_t* gf_utf8_to_wcs(const char* str);
124 char* gf_wcs_to_utf8(const wchar_t* str);
125 
126 #endif
127 
128 /*! @} */
129 
130 #ifdef __cplusplus
131 }
132 #endif
133 
134 
135 #endif		/*_GF_UTF_H_*/
136 
137