1 #pragma once 2 3 4 #include <string> 5 6 namespace utils 7 { 8 /** 9 * Return the size, in bytes, of the next UTF-8 codepoint, based on 10 * the given char. 11 */ 12 std::size_t get_next_codepoint_size(const unsigned char c); 13 /** 14 * Returns true if the given null-terminated string is valid utf-8. 15 * 16 * Based on http://en.wikipedia.org/wiki/UTF-8#Description 17 */ 18 bool is_valid_utf8(const char* s); 19 /** 20 * Remove all invalid codepoints from the given utf-8-encoded string. 21 * The value returned is a copy of the string, without the removed chars. 22 * 23 * See http://www.w3.org/TR/xml/#charsets for the list of valid characters 24 * in XML. 25 */ 26 std::string remove_invalid_xml_chars(const std::string& original); 27 /** 28 * Convert the given string (encoded is "encoding") into valid utf-8. 29 * If some decoding fails, insert an utf-8 placeholder character instead. 30 */ 31 std::string convert_to_utf8(const std::string& str, const char* charset); 32 } 33 34 namespace xep0106 35 { 36 /** 37 * Decode and encode inplace. 38 */ 39 void decode(std::string&); 40 void encode(std::string&); 41 } 42 43 44