1 /*	$NetBSD: utf8.h,v 1.4 2014/12/10 04:37:55 christos Exp $	*/
2 
3 /* Id: utf8.h,v 1.1 2003/06/04 00:25:44 marka Exp  */
4 /*
5  * Copyright (c) 2000 Japan Network Information Center.  All rights reserved.
6  *
7  * By using this file, you agree to the terms and conditions set forth bellow.
8  *
9  * 			LICENSE TERMS AND CONDITIONS
10  *
11  * The following License Terms and Conditions apply, unless a different
12  * license is obtained from Japan Network Information Center ("JPNIC"),
13  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
14  * Chiyoda-ku, Tokyo 101-0047, Japan.
15  *
16  * 1. Use, Modification and Redistribution (including distribution of any
17  *    modified or derived work) in source and/or binary forms is permitted
18  *    under this License Terms and Conditions.
19  *
20  * 2. Redistribution of source code must retain the copyright notices as they
21  *    appear in each source code file, this License Terms and Conditions.
22  *
23  * 3. Redistribution in binary form must reproduce the Copyright Notice,
24  *    this License Terms and Conditions, in the documentation and/or other
25  *    materials provided with the distribution.  For the purposes of binary
26  *    distribution the "Copyright Notice" refers to the following language:
27  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
28  *
29  * 4. The name of JPNIC may not be used to endorse or promote products
30  *    derived from this Software without specific prior written approval of
31  *    JPNIC.
32  *
33  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
34  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
36  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
37  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
40  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
42  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
43  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
44  */
45 
46 #ifndef IDN_UTF8_H
47 #define IDN_UTF8_H 1
48 
49 #ifdef __cplusplus
50 extern "C" {
51 #endif
52 
53 /*
54  * UTF-8 encoded string facility.
55  */
56 
57 #include <idn/export.h>
58 
59 /*
60  * Get the length of a character.
61  *
62  * Get the length (in bytes) of a character whose first byte is pointed
63  * by 's'.  Since this function only looks one first byte to determine the
64  * length, it is possible some of the following bytes are invalid.
65  */
66 IDN_EXPORT int
67 idn_utf8_mblen(const char *s);
68 
69 /*
70  * Get one character.
71  *
72  * Get the first character of the string pointed by 's', and copy it
73  * to 'buf', whose length is 'len'.  Returns the number of bytes copied,
74  * or zero if the encoding is invalid or len is too small.
75  *
76  * The area pointed by 'buf' must be large enough to store any UTF-8 encoded
77  * character.
78  *
79  * Note that the copied string is not NUL-terminated.
80  */
81 IDN_EXPORT int
82 idn_utf8_getmb(const char *s, size_t len, char *buf);
83 
84 /*
85  * Get one character in UCS-4.
86  *
87  * Similar to 'idn_utf8_getmb', except that the result is not in UTF-8
88  * encoding, but in UCS-4 format (plain 32bit integer value).
89  */
90 IDN_EXPORT int
91 idn_utf8_getwc(const char *s, size_t len, unsigned long *vp);
92 
93 /*
94  * Put one character.
95  *
96  * This function is an opposite of 'idn_utf8_getwc'.  It takes a UCS-4
97  * value 'v', convert it to UTF-8 encoded sequence, and store it to 's',
98  * whose length is 'len'.  It returns the number of bytes written, or
99  * zero 'v' is out of range or 'len' is too small.
100  */
101 IDN_EXPORT int
102 idn_utf8_putwc(char *s, size_t len, unsigned long v);
103 
104 /*
105  * Check the validity of UTF-8 encoded character.
106  *
107  * Check if the character pointed by 's' is a valid UTF-8 encoded
108  * character.  Return the length of the character (in bytes) if it is valid,
109  * 0 otherwise.
110  */
111 IDN_EXPORT int
112 idn_utf8_isvalidchar(const char *s);
113 
114 /*
115  * Check the validity of UTF-8 encoded string.
116  *
117  * Check if the NUL-terminated string 's' is valid as a UTF-8 encoded
118  * string.  Return 1 if it is valid, 0 otherwise.
119  */
120 IDN_EXPORT int
121 idn_utf8_isvalidstring(const char *s);
122 
123 /*
124  * Find first byte of a character.
125  *
126  * Find the first byte of a character 's' points to.  's' may point
127  * the 2nd or later byte of a character.  'known_top' is a pointer to
128  * a string which contains 's', and is known to be the first byte of
129  * a character.  If it couldn't find the first byte between 'known_top'
130  * and 's', NULL will be returned.
131  */
132 IDN_EXPORT char *
133 idn_utf8_findfirstbyte(const char *s, const char *known_top);
134 
135 #ifdef __cplusplus
136 }
137 #endif
138 
139 #endif /* IDN_UTF8_H */
140