1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #ifndef INCLUDE_utf8_h__
8 #define INCLUDE_utf8_h__
9 
10 #include "common.h"
11 
12 /*
13  * Iterate through an UTF-8 string, yielding one codepoint at a time.
14  *
15  * @param out pointer where to store the current codepoint
16  * @param str current position in the string
17  * @param str_len size left in the string
18  * @return length in bytes of the read codepoint; -1 if the codepoint was invalid
19  */
20 extern int git_utf8_iterate(uint32_t *out, const char *str, size_t str_len);
21 
22 /**
23  * Returns the number of characters in the given string.
24  *
25  * This function will count invalid codepoints; if any given byte is
26  * not part of a valid UTF-8 codepoint, then it will be counted toward
27  * the length in characters.
28  *
29  * In other words:
30  *   0x24 (U+0024 "$") has length 1
31  *   0xc2 0xa2 (U+00A2 "¢") has length 1
32  *   0x24 0xc2 0xa2 (U+0024 U+00A2 "$¢") has length 2
33  *   0xf0 0x90 0x8d 0x88 (U+10348 "��") has length 1
34  *   0x24 0xc0 0xc1 0x34 (U+0024 <invalid> <invalid> "4) has length 4
35  *
36  * @param str string to scan
37  * @param str_len size of the string
38  * @return length in characters of the string
39  */
40 extern size_t git_utf8_char_length(const char *str, size_t str_len);
41 
42 /**
43  * Iterate through an UTF-8 string and stops after finding any invalid UTF-8
44  * codepoints.
45  *
46  * @param str string to scan
47  * @param str_len size of the string
48  * @return length in bytes of the string that contains valid data
49  */
50 extern size_t git_utf8_valid_buf_length(const char *str, size_t str_len);
51 
52 #endif
53