1 #pragma once
2 
3 
4 #include <string>
5 
6 namespace utils
7 {
8   /**
9    * Return the size, in bytes, of the next UTF-8 codepoint, based on
10    * the given char.
11    */
12   std::size_t get_next_codepoint_size(const unsigned char c);
13   /**
14    * Returns true if the given null-terminated string is valid utf-8.
15    *
16    * Based on http://en.wikipedia.org/wiki/UTF-8#Description
17    */
18   bool is_valid_utf8(const char* s);
19   /**
20    * Remove all invalid codepoints from the given utf-8-encoded string.
21    * The value returned is a copy of the string, without the removed chars.
22    *
23    * See http://www.w3.org/TR/xml/#charsets for the list of valid characters
24    * in XML.
25    */
26   std::string remove_invalid_xml_chars(const std::string& original);
27   /**
28    * Convert the given string (encoded is "encoding") into valid utf-8.
29    * If some decoding fails, insert an utf-8 placeholder character instead.
30    */
31   std::string convert_to_utf8(const std::string& str, const char* charset);
32 }
33 
34 namespace xep0106
35 {
36   /**
37    * Decode and encode inplace.
38    */
39   void decode(std::string&);
40   void encode(std::string&);
41 }
42 
43 
44