1 /* 2 This file is part of Deadbeef Player source code 3 http://deadbeef.sourceforge.net 4 5 utf8 string manipulation 6 7 Copyright (C) 2009-2013 Alexey Yakovenko 8 9 This software is provided 'as-is', without any express or implied 10 warranty. In no event will the authors be held liable for any damages 11 arising from the use of this software. 12 13 Permission is granted to anyone to use this software for any purpose, 14 including commercial applications, and to alter it and redistribute it 15 freely, subject to the following restrictions: 16 17 1. The origin of this software must not be misrepresented; you must not 18 claim that you wrote the original software. If you use this software 19 in a product, an acknowledgment in the product documentation would be 20 appreciated but is not required. 21 2. Altered source versions must be plainly marked as such, and must not be 22 misrepresented as being the original software. 23 3. This notice may not be removed or altered from any source distribution. 24 25 Alexey Yakovenko waker@users.sourceforge.net 26 */ 27 28 /* 29 based on Basic UTF-8 manipulation routines 30 by Jeff Bezanson 31 placed in the public domain Fall 2005 32 */ 33 34 #ifndef __UTF8_H 35 #define __UTF8_H 36 37 #include <stdint.h> 38 #include <stdarg.h> 39 40 /* is c the start of a utf8 sequence? */ 41 #define isutf(c) (((c)&0xC0)!=0x80) 42 43 /* convert UTF-8 data to wide character */ 44 int u8_toucs(uint32_t *dest, int32_t sz, const char *src, int32_t srcsz); 45 46 /* the opposite conversion */ 47 int u8_toutf8(char *dest, int32_t sz, uint32_t *src, int32_t srcsz); 48 49 /* single character to UTF-8 */ 50 int u8_wc_toutf8(char *dest, uint32_t ch); 51 52 /* character number to byte offset */ 53 int u8_offset(char *str, int32_t charnum); 54 55 /* byte offset to character number */ 56 int u8_charnum(char *s, int32_t offset); 57 58 /* return next character, updating an index variable */ 59 uint32_t u8_nextchar(const char *s, int32_t *i); 60 61 /* copies num_chars characters from src to dest, return bytes written */ 62 int u8_strncpy (char *dest, const char* src, int num_chars); 63 64 /* copy num_bytes maximum bytes from src to dest, but always stop at the last possible utf8 character boundary; 65 return number of bytes copied 66 */ 67 int u8_strnbcpy (char *dest, const char* src, int num_bytes); 68 69 /* copy single utf8 character of up to num_bytes bytes large, only if num_bytes is large enough; 70 return number of bytes copied 71 */ 72 int u8_charcpy (char *dest, const char *src, int num_bytes); 73 74 /* move to next character */ 75 void u8_inc(const char *s, int32_t *i); 76 77 /* move to previous character */ 78 void u8_dec(const char *s, int32_t *i); 79 80 /* assuming src points to the character after a backslash, read an 81 escape sequence, storing the result in dest and returning the number of 82 input characters processed */ 83 int u8_read_escape_sequence(const char *src, uint32_t *dest); 84 85 /* given a wide character, convert it to an ASCII escape sequence stored in 86 buf, where buf is "sz" bytes. returns the number of characters output. */ 87 int u8_escape_wchar(char *buf, int32_t sz, uint32_t ch); 88 89 /* convert a string "src" containing escape sequences to UTF-8 */ 90 int u8_unescape(char *buf, int32_t sz, const char *src); 91 92 /* convert UTF-8 "src" to ASCII with escape sequences. 93 if escape_quotes is nonzero, quote characters will be preceded by 94 backslashes as well. */ 95 int u8_escape(char *buf, int32_t sz, const char *src, int32_t escape_quotes); 96 97 /* utility predicates used by the above */ 98 int octal_digit(char c); 99 int hex_digit(char c); 100 101 /* return a pointer to the first occurrence of ch in s, or NULL if not 102 found. character index of found character returned in *charn. */ 103 char *u8_strchr(char *s, uint32_t ch, int32_t *charn); 104 105 /* same as the above, but searches a buffer of a given size instead of 106 a NUL-terminated string. */ 107 char *u8_memchr(char *s, uint32_t ch, size_t sz, int32_t *charn); 108 109 /* count the number of characters in a UTF-8 string */ 110 int u8_strlen(char *s); 111 112 int u8_is_locale_utf8(char *locale); 113 114 /* printf where the format string and arguments may be in UTF-8. 115 you can avoid this function and just use ordinary printf() if the current 116 locale is UTF-8. */ 117 int u8_vprintf(char *fmt, va_list ap); 118 int u8_printf(char *fmt, ...); 119 120 // validate utf8 string 121 // returns 1 if valid, 0 otherwise 122 int u8_valid (const char *str, 123 int max_len, 124 const char **end); 125 126 int 127 u8_tolower (const signed char *c, int l, char *out); 128 129 int 130 u8_toupper (const signed char *c, int l, char *out); 131 132 int 133 u8_strcasecmp (const char *a, const char *b); 134 135 const char * 136 utfcasestr (const char *s1, const char *s2); 137 138 // s2 must be lowercase 139 const char * 140 utfcasestr_fast (const char *s1, const char *s2); 141 142 #endif 143