1 /* 2 * Copyright (C) the libgit2 contributors. All rights reserved. 3 * 4 * This file is part of libgit2, distributed under the GNU GPL v2 with 5 * a Linking Exception. For full terms see the included COPYING file. 6 */ 7 #ifndef INCLUDE_utf8_h__ 8 #define INCLUDE_utf8_h__ 9 10 #include "common.h" 11 12 /* 13 * Iterate through an UTF-8 string, yielding one codepoint at a time. 14 * 15 * @param out pointer where to store the current codepoint 16 * @param str current position in the string 17 * @param str_len size left in the string 18 * @return length in bytes of the read codepoint; -1 if the codepoint was invalid 19 */ 20 extern int git_utf8_iterate(uint32_t *out, const char *str, size_t str_len); 21 22 /** 23 * Returns the number of characters in the given string. 24 * 25 * This function will count invalid codepoints; if any given byte is 26 * not part of a valid UTF-8 codepoint, then it will be counted toward 27 * the length in characters. 28 * 29 * In other words: 30 * 0x24 (U+0024 "$") has length 1 31 * 0xc2 0xa2 (U+00A2 "¢") has length 1 32 * 0x24 0xc2 0xa2 (U+0024 U+00A2 "$¢") has length 2 33 * 0xf0 0x90 0x8d 0x88 (U+10348 "") has length 1 34 * 0x24 0xc0 0xc1 0x34 (U+0024 <invalid> <invalid> "4) has length 4 35 * 36 * @param str string to scan 37 * @param str_len size of the string 38 * @return length in characters of the string 39 */ 40 extern size_t git_utf8_char_length(const char *str, size_t str_len); 41 42 /** 43 * Iterate through an UTF-8 string and stops after finding any invalid UTF-8 44 * codepoints. 45 * 46 * @param str string to scan 47 * @param str_len size of the string 48 * @return length in bytes of the string that contains valid data 49 */ 50 extern size_t git_utf8_valid_buf_length(const char *str, size_t str_len); 51 52 #endif 53