1 /*
2   This file is part of Deadbeef Player source code
3   http://deadbeef.sourceforge.net
4 
5   utf8 string manipulation
6 
7   Copyright (C) 2009-2013 Alexey Yakovenko
8 
9   This software is provided 'as-is', without any express or implied
10   warranty.  In no event will the authors be held liable for any damages
11   arising from the use of this software.
12 
13   Permission is granted to anyone to use this software for any purpose,
14   including commercial applications, and to alter it and redistribute it
15   freely, subject to the following restrictions:
16 
17   1. The origin of this software must not be misrepresented; you must not
18      claim that you wrote the original software. If you use this software
19      in a product, an acknowledgment in the product documentation would be
20      appreciated but is not required.
21   2. Altered source versions must be plainly marked as such, and must not be
22      misrepresented as being the original software.
23   3. This notice may not be removed or altered from any source distribution.
24 
25   Alexey Yakovenko waker@users.sourceforge.net
26 */
27 
28 /*
29     based on Basic UTF-8 manipulation routines
30     by Jeff Bezanson
31     placed in the public domain Fall 2005
32 */
33 
34 #ifndef __UTF8_H
35 #define __UTF8_H
36 
37 #include <stdint.h>
38 #include <stdarg.h>
39 
40 /* is c the start of a utf8 sequence? */
41 #define isutf(c) (((c)&0xC0)!=0x80)
42 
43 /* convert UTF-8 data to wide character */
44 int u8_toucs(uint32_t *dest, int32_t sz, const char *src, int32_t srcsz);
45 
46 /* the opposite conversion */
47 int u8_toutf8(char *dest, int32_t sz, uint32_t *src, int32_t srcsz);
48 
49 /* single character to UTF-8 */
50 int u8_wc_toutf8(char *dest, uint32_t ch);
51 
52 /* character number to byte offset */
53 int u8_offset(char *str, int32_t charnum);
54 
55 /* byte offset to character number */
56 int u8_charnum(char *s, int32_t offset);
57 
58 /* return next character, updating an index variable */
59 uint32_t u8_nextchar(const char *s, int32_t *i);
60 
61 /* copies num_chars characters from src to dest, return bytes written */
62 int u8_strncpy (char *dest, const char* src, int num_chars);
63 
64 /* copy num_bytes maximum bytes from src to dest, but always stop at the last possible utf8 character boundary;
65  return number of bytes copied
66  */
67 int u8_strnbcpy (char *dest, const char* src, int num_bytes);
68 
69 /* copy single utf8 character of up to num_bytes bytes large, only if num_bytes is large enough;
70   return number of bytes copied
71  */
72 int u8_charcpy (char *dest, const char *src, int num_bytes);
73 
74 /* move to next character */
75 void u8_inc(const char *s, int32_t *i);
76 
77 /* move to previous character */
78 void u8_dec(const char *s, int32_t *i);
79 
80 /* assuming src points to the character after a backslash, read an
81    escape sequence, storing the result in dest and returning the number of
82    input characters processed */
83 int u8_read_escape_sequence(const char *src, uint32_t *dest);
84 
85 /* given a wide character, convert it to an ASCII escape sequence stored in
86    buf, where buf is "sz" bytes. returns the number of characters output. */
87 int u8_escape_wchar(char *buf, int32_t sz, uint32_t ch);
88 
89 /* convert a string "src" containing escape sequences to UTF-8 */
90 int u8_unescape(char *buf, int32_t sz, const char *src);
91 
92 /* convert UTF-8 "src" to ASCII with escape sequences.
93    if escape_quotes is nonzero, quote characters will be preceded by
94    backslashes as well. */
95 int u8_escape(char *buf, int32_t sz, const char *src, int32_t escape_quotes);
96 
97 /* utility predicates used by the above */
98 int octal_digit(char c);
99 int hex_digit(char c);
100 
101 /* return a pointer to the first occurrence of ch in s, or NULL if not
102    found. character index of found character returned in *charn. */
103 char *u8_strchr(char *s, uint32_t ch, int32_t *charn);
104 
105 /* same as the above, but searches a buffer of a given size instead of
106    a NUL-terminated string. */
107 char *u8_memchr(char *s, uint32_t ch, size_t sz, int32_t *charn);
108 
109 /* count the number of characters in a UTF-8 string */
110 int u8_strlen(char *s);
111 
112 int u8_is_locale_utf8(char *locale);
113 
114 /* printf where the format string and arguments may be in UTF-8.
115    you can avoid this function and just use ordinary printf() if the current
116    locale is UTF-8. */
117 int u8_vprintf(char *fmt, va_list ap);
118 int u8_printf(char *fmt, ...);
119 
120 // validate utf8 string
121 // returns 1 if valid, 0 otherwise
122 int u8_valid (const char  *str,
123         int max_len,
124         const char **end);
125 
126 int
127 u8_tolower (const signed char *c, int l, char *out);
128 
129 int
130 u8_toupper (const signed char *c, int l, char *out);
131 
132 int
133 u8_strcasecmp (const char *a, const char *b);
134 
135 const char *
136 utfcasestr (const char *s1, const char *s2);
137 
138 // s2 must be lowercase
139 const char *
140 utfcasestr_fast (const char *s1, const char *s2);
141 
142 #endif
143