1 #ifndef INCLUDED_UTF8_H 2 #define INCLUDED_UTF8_H 3 /* vim: set ts=8 sts=4 sw=4 tw=80 noet: */ 4 /*====================================================================== 5 Copyright (C) 2004,2005,2009 Walter Doekes <walter+tthsum@wjd.nu> 6 This file is part of tthsum. 7 8 tthsum is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 tthsum is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with tthsum. If not, see <http://www.gnu.org/licenses/>. 20 ======================================================================*/ 21 22 /** 23 * Conversion routines to and from UTF8 and UCS (UNICODE). 24 * 25 * The ANSI-C mbstowcs and wcstombs functions aren't sufficient to do 26 * UTF8 and USC conversion on machines that do not have the proper 27 * locales set up. 28 * The two functions contained herein (this and utf8.c) provide 29 * functions similar in behaviour to mbstowcs and wcstombs, but 30 * function without a proper locale set up. 31 * 32 * See also: mbstowcs(3), wcstombs(3), unicode(7), utf8(7). 33 * 34 * Last modified: 2009-05-02 35 */ 36 37 #include <stddef.h> 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 /** 44 * Convert an UTF8 multi-byte string to an UCS wide-character string. 45 * The conversion stops at the first 0 (null) character encountered. 46 * 47 * - If dest is NULL, utf8towcs returns the size required for the full 48 * string (without the terminating 0 (null) character), n is ignored. 49 * - If dest is not NULL, utf8towcs tries to write at most n characters 50 * to dest. the characters written (without the terminating 0 (null)) 51 * is returned. 52 * - If src is NULL, behaviour is undefined. 53 * - If src contains an invalid UTF8 sequence, utf8towcs returns 54 * (size_t)-1. If there is room, a terminating 0 (null) character 55 * will be placed after the last successfully converted character in 56 * dest. 57 */ 58 size_t utf8towcs(wchar_t* dest, const char* src, size_t n); 59 60 /** 61 * Allocate a wide character string and perform utf8towcs on it. 62 * Free the allocated memory with free(). 63 */ 64 size_t utf8toawcs(wchar_t** dest, const char* src); 65 66 /** 67 * Convert an UCS wide-character string to an UTF8 multi-byte string. 68 * The conversion stops at the first 0 (null) character encountered. 69 * 70 * - If dest is NULL, wcstoutf8 returns the size required for the full 71 * string (without the terminating 0 (null) character), n is ignored. 72 * - If dest is not NULL, wcstoutf8 tries to write at most n characters 73 * to dest. the characters written (without the terminating 0 (null)) 74 * is returned. 75 * - If src is NULL, behaviour is undefined. 76 * - If src contains an invalid UCS sequence, utf8towcs returns 77 * (size_t)-1. If there is room, a terminating 0 (null) character 78 * will be placed after the last successfully converted character in 79 * dest. 80 * The Unicode standard specifies no characters above 0x10ffff, so an 81 * UTF8 encoding will at most contain 4 instead of 6 characters per 82 * wide character. 83 */ 84 size_t wcstoutf8(char* dest, const wchar_t* src, size_t n); 85 86 /** 87 * Allocate a wide character string and perform wcstoutf8 on it. 88 * Free the allocated memory with free(). 89 */ 90 size_t wcstoautf8(char** dest, const wchar_t* src); 91 92 #ifdef __cplusplus 93 } /* extern "C" */ 94 #endif /* __cplusplus */ 95 96 #endif /* INCLUDED_UTF8_H */ 97