1 /* ScummVM - Graphic Adventure Engine 2 * 3 * ScummVM is the legal property of its developers, whose names 4 * are too numerous to list here. Please refer to the COPYRIGHT 5 * file distributed with this source distribution. 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 2 10 * of the License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef COMMON_USTR_H 24 #define COMMON_USTR_H 25 26 #include "common/scummsys.h" 27 #include "common/str-enc.h" 28 #include "common/base-str.h" 29 30 namespace Common { 31 32 /** 33 * @defgroup common_ustr UTF-32 strings 34 * @ingroup common 35 * 36 * @brief API for working with UTF-32 strings. 37 * 38 * @{ 39 */ 40 41 class String; 42 43 /** 44 * A simple string class for UTF-32 strings in ScummVM. The main intention 45 * behind this class is to feature a simple way of displaying UTF-32 strings 46 * through the Graphics::Font API. 47 * 48 * Note that operations like equals, deleteCharacter, toUppercase, etc. 49 * are only simplified convenience operations. They might not fully work 50 * as you would expect for a proper UTF-32 string class. 51 * 52 * The presence of \0 characters in the string will cause undefined 53 * behavior in some operations. 54 */ 55 #ifdef USE_CXX11 56 typedef char32_t u32char_type_t; 57 #else 58 typedef uint32 u32char_type_t; 59 #endif 60 61 class U32String : public BaseString<u32char_type_t> { 62 public: 63 typedef uint32 unsigned_type; /*!< Unsigned version of the underlying type. */ 64 public: 65 /** Construct a new empty string. */ U32String()66 U32String() : BaseString<u32char_type_t>() {} 67 68 /** Construct a new string from the given null-terminated C string. */ U32String(const value_type * str)69 explicit U32String(const value_type *str) : BaseString<u32char_type_t>(str) {} 70 71 /** Construct a new string containing exactly @p len characters read from address @p str. */ U32String(const value_type * str,uint32 len)72 U32String(const value_type *str, uint32 len) : BaseString<u32char_type_t>(str, len) {} 73 74 #ifdef USE_CXX11 U32String(const uint32 * str)75 explicit U32String(const uint32 *str) : BaseString<u32char_type_t>((const value_type *) str) {} U32String(const uint32 * str,uint32 len)76 U32String(const uint32 *str, uint32 len) : BaseString<u32char_type_t>((const value_type *) str, len) {} U32String(const uint32 * beginP,const uint32 * endP)77 U32String(const uint32 *beginP, const uint32 *endP) : BaseString<u32char_type_t>((const value_type *) beginP, (const value_type *) endP) {} 78 #endif 79 80 /** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */ U32String(const value_type * beginP,const value_type * endP)81 U32String(const value_type *beginP, const value_type *endP) : BaseString<u32char_type_t>(beginP, endP) {} 82 83 /** Construct a copy of the given string. */ U32String(const U32String & str)84 U32String(const U32String &str) : BaseString<u32char_type_t>(str) {} 85 86 /** Construct a new string from the given null-terminated C string that uses the given @p page encoding. */ 87 explicit U32String(const char *str, CodePage page = kUtf8); 88 89 /** Construct a new string containing exactly @p len characters read from address @p str. */ 90 U32String(const char *str, uint32 len, CodePage page = kUtf8); 91 92 /** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */ 93 U32String(const char *beginP, const char *endP, CodePage page = kUtf8); 94 95 /** Construct a copy of the given string. */ 96 U32String(const String &str, CodePage page = kUtf8); 97 98 /** Construct a string consisting of the given character. */ 99 explicit U32String(value_type c); 100 101 /** Assign a given string to this string. */ 102 U32String &operator=(const U32String &str); 103 104 /** @overload */ 105 U32String &operator=(const String &str); 106 107 /** @overload */ 108 U32String &operator=(const value_type *str); 109 110 /** @overload */ 111 U32String &operator=(const char *str); 112 113 /** Append the given string to this string. */ 114 U32String &operator+=(const U32String &str); 115 116 /** @overload */ 117 U32String &operator+=(value_type c); 118 119 using BaseString<value_type>::operator==; 120 using BaseString<value_type>::operator!=; 121 122 /** Check whether this string is identical to string @p x. */ 123 bool operator==(const String &x) const; 124 125 /** @overload */ 126 bool operator==(const char *x) const; 127 128 /** Check whether this string is different than string @p x. */ 129 bool operator!=(const String &x) const; 130 131 /** @overload */ 132 bool operator!=(const char *x) const; 133 134 /** Convert the string to the given @p page encoding and return the result as a new String. */ 135 String encode(CodePage page = kUtf8) const; 136 137 /** 138 * Print formatted data into a U32String object. 139 * 140 * Similar to sprintf, except that it stores the result 141 * in a (variably sized) string instead of a fixed-size buffer. 142 */ 143 static U32String format(U32String fmt, ...); 144 145 /** @overload **/ 146 static U32String format(const char *fmt, ...); 147 148 /** 149 * Print formatted data into a U32String object. 150 * The method takes in the output by reference and works with iterators. 151 */ 152 static int vformat(const value_type *fmt, const value_type *fmtEnd, U32String &output, va_list args); 153 154 /** 155 * Helper function for vformat. Convert an int to string. 156 * Minimal implementation, only for base 10. 157 */ 158 static char* itoa(int num, char* str, int base); 159 160 using BaseString<value_type>::insertString; 161 void insertString(const char *s, uint32 p, CodePage page = kUtf8); /*!< Insert string @p s into this string at position @p p. */ 162 void insertString(const String &s, uint32 p, CodePage page = kUtf8); /*!< @overload */ 163 164 /** Return a substring of this string */ 165 U32String substr(size_t pos = 0, size_t len = npos) const; 166 u32_str()167 const uint32 *u32_str() const { /*!< Return the string as a UTF-32 pointer. */ 168 return (const uint32 *) _str; 169 } 170 171 /** Decode a big endian UTF-16 string into a U32String. */ 172 static Common::U32String decodeUTF16BE(const uint16 *start, uint len); 173 174 /** Decode a little endian UTF-16 string into a U32String. */ 175 static Common::U32String decodeUTF16LE(const uint16 *start, uint len); 176 177 /** Decode a native UTF-16 string into a U32String. */ 178 static Common::U32String decodeUTF16Native(const uint16 *start, uint len); 179 180 /** Transform a U32String into UTF-16 representation (big endian). The result must be freed. */ 181 uint16 *encodeUTF16BE(uint *len = nullptr) const; 182 183 /** Transform a U32String into UTF-16 representation (native endian). The result must be freed. */ 184 uint16 *encodeUTF16LE(uint *len = nullptr) const; 185 186 /** Transform a U32String into UTF-16 representation (native encoding). The result must be freed. */ 187 uint16 *encodeUTF16Native(uint *len = nullptr) const; 188 189 private: 190 void decodeInternal(const char *str, uint32 len, CodePage page); 191 void decodeOneByte(const char *str, uint32 len, CodePage page); 192 void decodeWindows932(const char *src, uint32 len); 193 void decodeWindows949(const char *src, uint32 len); 194 void decodeWindows950(const char *src, uint32 len); 195 void decodeUTF8(const char *str, uint32 len); 196 197 friend class String; 198 }; 199 200 /** Concatenate strings @p x and @p y. */ 201 U32String operator+(const U32String &x, const U32String &y); 202 203 /** Append the given @p y character to the given @p x string. */ 204 U32String operator+(const U32String &x, U32String::value_type y); 205 206 /** 207 * Converts string with all non-printable characters properly escaped 208 * with use of C++ escape sequences. 209 * Unlike the String version, this does not escape characters with 210 * codepoints > 127. 211 * 212 * @param src The source string. 213 * @param keepNewLines Whether keep newlines or convert them to '\n', default: true. 214 * @return The converted string. 215 */ 216 U32String toPrintable(const U32String &src, bool keepNewLines = true); 217 218 /** @} */ 219 220 } // End of namespace Common 221 222 #endif 223