1 /* $NetBSD: utf8.h,v 1.4 2014/12/10 04:37:55 christos Exp $ */ 2 3 /* Id: utf8.h,v 1.1 2003/06/04 00:25:44 marka Exp */ 4 /* 5 * Copyright (c) 2000 Japan Network Information Center. All rights reserved. 6 * 7 * By using this file, you agree to the terms and conditions set forth bellow. 8 * 9 * LICENSE TERMS AND CONDITIONS 10 * 11 * The following License Terms and Conditions apply, unless a different 12 * license is obtained from Japan Network Information Center ("JPNIC"), 13 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 14 * Chiyoda-ku, Tokyo 101-0047, Japan. 15 * 16 * 1. Use, Modification and Redistribution (including distribution of any 17 * modified or derived work) in source and/or binary forms is permitted 18 * under this License Terms and Conditions. 19 * 20 * 2. Redistribution of source code must retain the copyright notices as they 21 * appear in each source code file, this License Terms and Conditions. 22 * 23 * 3. Redistribution in binary form must reproduce the Copyright Notice, 24 * this License Terms and Conditions, in the documentation and/or other 25 * materials provided with the distribution. For the purposes of binary 26 * distribution the "Copyright Notice" refers to the following language: 27 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 28 * 29 * 4. The name of JPNIC may not be used to endorse or promote products 30 * derived from this Software without specific prior written approval of 31 * JPNIC. 32 * 33 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 36 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 38 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 39 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 40 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 41 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 42 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 43 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 44 */ 45 46 #ifndef IDN_UTF8_H 47 #define IDN_UTF8_H 1 48 49 #ifdef __cplusplus 50 extern "C" { 51 #endif 52 53 /* 54 * UTF-8 encoded string facility. 55 */ 56 57 #include <idn/export.h> 58 59 /* 60 * Get the length of a character. 61 * 62 * Get the length (in bytes) of a character whose first byte is pointed 63 * by 's'. Since this function only looks one first byte to determine the 64 * length, it is possible some of the following bytes are invalid. 65 */ 66 IDN_EXPORT int 67 idn_utf8_mblen(const char *s); 68 69 /* 70 * Get one character. 71 * 72 * Get the first character of the string pointed by 's', and copy it 73 * to 'buf', whose length is 'len'. Returns the number of bytes copied, 74 * or zero if the encoding is invalid or len is too small. 75 * 76 * The area pointed by 'buf' must be large enough to store any UTF-8 encoded 77 * character. 78 * 79 * Note that the copied string is not NUL-terminated. 80 */ 81 IDN_EXPORT int 82 idn_utf8_getmb(const char *s, size_t len, char *buf); 83 84 /* 85 * Get one character in UCS-4. 86 * 87 * Similar to 'idn_utf8_getmb', except that the result is not in UTF-8 88 * encoding, but in UCS-4 format (plain 32bit integer value). 89 */ 90 IDN_EXPORT int 91 idn_utf8_getwc(const char *s, size_t len, unsigned long *vp); 92 93 /* 94 * Put one character. 95 * 96 * This function is an opposite of 'idn_utf8_getwc'. It takes a UCS-4 97 * value 'v', convert it to UTF-8 encoded sequence, and store it to 's', 98 * whose length is 'len'. It returns the number of bytes written, or 99 * zero 'v' is out of range or 'len' is too small. 100 */ 101 IDN_EXPORT int 102 idn_utf8_putwc(char *s, size_t len, unsigned long v); 103 104 /* 105 * Check the validity of UTF-8 encoded character. 106 * 107 * Check if the character pointed by 's' is a valid UTF-8 encoded 108 * character. Return the length of the character (in bytes) if it is valid, 109 * 0 otherwise. 110 */ 111 IDN_EXPORT int 112 idn_utf8_isvalidchar(const char *s); 113 114 /* 115 * Check the validity of UTF-8 encoded string. 116 * 117 * Check if the NUL-terminated string 's' is valid as a UTF-8 encoded 118 * string. Return 1 if it is valid, 0 otherwise. 119 */ 120 IDN_EXPORT int 121 idn_utf8_isvalidstring(const char *s); 122 123 /* 124 * Find first byte of a character. 125 * 126 * Find the first byte of a character 's' points to. 's' may point 127 * the 2nd or later byte of a character. 'known_top' is a pointer to 128 * a string which contains 's', and is known to be the first byte of 129 * a character. If it couldn't find the first byte between 'known_top' 130 * and 's', NULL will be returned. 131 */ 132 IDN_EXPORT char * 133 idn_utf8_findfirstbyte(const char *s, const char *known_top); 134 135 #ifdef __cplusplus 136 } 137 #endif 138 139 #endif /* IDN_UTF8_H */ 140