1 #include <stdarg.h> 2 #include <stdlib.h> 3 #include <stdint.h> 4 5 /* is c the start of a utf8 sequence? */ 6 #define isutf(c) (((c)&0xC0)!=0x80) 7 8 /* convert UTF-8 data to wide character */ 9 int u8_toucs(uint32_t *dest, int sz, char *src, int srcsz); 10 11 /* the opposite conversion */ 12 int u8_toutf8(char *dest, int sz, uint32_t *src, int srcsz); 13 14 /* single character to UTF-8 */ 15 int u8_wc_toutf8(char *dest, uint32_t ch); 16 17 /* character number to byte offset */ 18 int u8_offset(char *str, int charnum); 19 20 /* byte offset to character number */ 21 int u8_charnum(char *s, int offset); 22 23 /* return next character, updating an index variable */ 24 uint32_t u8_nextchar(char *s, int *i); 25 26 /* move to next character */ 27 void u8_inc(char *s, int *i); 28 29 /* move to previous character */ 30 void u8_dec(char *s, int *i); 31 32 /* returns length of next utf-8 sequence */ 33 int u8_seqlen(char *s); 34 35 /* assuming src points to the character after a backslash, read an 36 escape sequence, storing the result in dest and returning the number of 37 input characters processed */ 38 int u8_read_escape_sequence(char *src, uint32_t *dest); 39 40 /* given a wide character, convert it to an ASCII escape sequence stored in 41 buf, where buf is "sz" bytes. returns the number of characters output. */ 42 int u8_escape_wchar(char *buf, int sz, uint32_t ch); 43 44 /* convert a string "src" containing escape sequences to UTF-8 */ 45 int u8_unescape(char *buf, int sz, char *src); 46 47 /* convert UTF-8 "src" to ASCII with escape sequences. 48 if escape_quotes is nonzero, quote characters will be preceded by 49 backslashes as well. */ 50 int u8_escape(char *buf, int sz, char *src, int escape_quotes); 51 52 /* utility predicates used by the above */ 53 int octal_digit(char c); 54 int hex_digit(char c); 55 56 /* return a pointer to the first occurrence of ch in s, or NULL if not 57 found. character index of found character returned in *charn. */ 58 char *u8_strchr(char *s, uint32_t ch, int *charn); 59 60 /* same as the above, but searches a buffer of a given size instead of 61 a NUL-terminated string. */ 62 char *u8_memchr(char *s, uint32_t ch, size_t sz, int *charn); 63 64 /* count the number of characters in a UTF-8 string */ 65 int u8_strlen(char *s); 66 67 int u8_is_locale_utf8(char *locale); 68 69 /* printf where the format string and arguments may be in UTF-8. 70 you can avoid this function and just use ordinary printf() if the current 71 locale is UTF-8. */ 72 int u8_vprintf(char *fmt, va_list ap); 73 int u8_printf(char *fmt, ...); 74