1 #ifndef INCLUDED_UTF8_H
2 #define INCLUDED_UTF8_H
3 /* vim: set ts=8 sts=4 sw=4 tw=80 noet: */
4 /*======================================================================
5 Copyright (C) 2004,2005,2009 Walter Doekes <walter+tthsum@wjd.nu>
6 This file is part of tthsum.
7 
8 tthsum is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 tthsum is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with tthsum.  If not, see <http://www.gnu.org/licenses/>.
20 ======================================================================*/
21 
22 /**
23  * Conversion routines to and from UTF8 and UCS (UNICODE).
24  *
25  * The ANSI-C mbstowcs and wcstombs functions aren't sufficient to do
26  * UTF8 and USC conversion on machines that do not have the proper
27  * locales set up.
28  * The two functions contained herein (this and utf8.c) provide
29  * functions similar in behaviour to mbstowcs and wcstombs, but
30  * function without a proper locale set up.
31  *
32  * See also: mbstowcs(3), wcstombs(3), unicode(7), utf8(7).
33  *
34  * Last modified: 2009-05-02
35  */
36 
37 #include <stddef.h>
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 /**
44  * Convert an UTF8 multi-byte string to an UCS wide-character string.
45  * The conversion stops at the first 0 (null) character encountered.
46  *
47  * - If dest is NULL, utf8towcs returns the size required for the full
48  *   string (without the terminating 0 (null) character), n is ignored.
49  * - If dest is not NULL, utf8towcs tries to write at most n characters
50  *   to dest. the characters written (without the terminating 0 (null))
51  *   is returned.
52  * - If src is NULL, behaviour is undefined.
53  * - If src contains an invalid UTF8 sequence, utf8towcs returns
54  *   (size_t)-1. If there is room, a terminating 0 (null) character
55  *   will be placed after the last successfully converted character in
56  *   dest.
57  */
58 size_t utf8towcs(wchar_t* dest, const char* src, size_t n);
59 
60 /**
61  * Allocate a wide character string and perform utf8towcs on it.
62  * Free the allocated memory with free().
63  */
64 size_t utf8toawcs(wchar_t** dest, const char* src);
65 
66 /**
67  * Convert an UCS wide-character string to an UTF8 multi-byte string.
68  * The conversion stops at the first 0 (null) character encountered.
69  *
70  * - If dest is NULL, wcstoutf8 returns the size required for the full
71  *   string (without the terminating 0 (null) character), n is ignored.
72  * - If dest is not NULL, wcstoutf8 tries to write at most n characters
73  *   to dest. the characters written (without the terminating 0 (null))
74  *   is returned.
75  * - If src is NULL, behaviour is undefined.
76  * - If src contains an invalid UCS sequence, utf8towcs returns
77  *   (size_t)-1. If there is room, a terminating 0 (null) character
78  *   will be placed after the last successfully converted character in
79  *   dest.
80  *   The Unicode standard specifies no characters above 0x10ffff, so an
81  *   UTF8 encoding will at most contain 4 instead of 6 characters per
82  *   wide character.
83  */
84 size_t wcstoutf8(char* dest, const wchar_t* src, size_t n);
85 
86 /**
87  * Allocate a wide character string and perform wcstoutf8 on it.
88  * Free the allocated memory with free().
89  */
90 size_t wcstoautf8(char** dest, const wchar_t* src);
91 
92 #ifdef __cplusplus
93 } /* extern "C" */
94 #endif /* __cplusplus */
95 
96 #endif /* INCLUDED_UTF8_H */
97