1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef COMMON_USTR_H
24 #define COMMON_USTR_H
25 
26 #include "common/scummsys.h"
27 #include "common/str-enc.h"
28 #include "common/base-str.h"
29 
30 namespace Common {
31 
32 /**
33  * @defgroup common_ustr UTF-32 strings
34  * @ingroup common
35  *
36  * @brief API for working with UTF-32 strings.
37  *
38  * @{
39  */
40 
41 class String;
42 
43 /**
44  * A simple string class for UTF-32 strings in ScummVM. The main intention
45  * behind this class is to feature a simple way of displaying UTF-32 strings
46  * through the Graphics::Font API.
47  *
48  * Note that operations like equals, deleteCharacter, toUppercase, etc.
49  * are only simplified convenience operations. They might not fully work
50  * as you would expect for a proper UTF-32 string class.
51  *
52  * The presence of \0 characters in the string will cause undefined
53  * behavior in some operations.
54  */
55 #ifdef USE_CXX11
56 typedef char32_t u32char_type_t;
57 #else
58 typedef uint32 u32char_type_t;
59 #endif
60 
61 class U32String : public BaseString<u32char_type_t> {
62 public:
63 	typedef uint32 unsigned_type; /*!< Unsigned version of the underlying type. */
64 public:
65 	/** Construct a new empty string. */
U32String()66 	U32String() : BaseString<u32char_type_t>() {}
67 
68 	/** Construct a new string from the given null-terminated C string. */
U32String(const value_type * str)69 	explicit U32String(const value_type *str) : BaseString<u32char_type_t>(str) {}
70 
71 	/** Construct a new string containing exactly @p len characters read from address @p str. */
U32String(const value_type * str,uint32 len)72 	U32String(const value_type *str, uint32 len) : BaseString<u32char_type_t>(str, len) {}
73 
74 #ifdef USE_CXX11
U32String(const uint32 * str)75 	explicit U32String(const uint32 *str) : BaseString<u32char_type_t>((const value_type *) str) {}
U32String(const uint32 * str,uint32 len)76 	U32String(const uint32 *str, uint32 len) : BaseString<u32char_type_t>((const value_type *) str, len) {}
U32String(const uint32 * beginP,const uint32 * endP)77 	U32String(const uint32 *beginP, const uint32 *endP) : BaseString<u32char_type_t>((const value_type *) beginP, (const value_type *) endP) {}
78 #endif
79 
80 	/** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */
U32String(const value_type * beginP,const value_type * endP)81 	U32String(const value_type *beginP, const value_type *endP) : BaseString<u32char_type_t>(beginP, endP) {}
82 
83 	/** Construct a copy of the given string. */
U32String(const U32String & str)84 	U32String(const U32String &str) : BaseString<u32char_type_t>(str) {}
85 
86 	/** Construct a new string from the given null-terminated C string that uses the given @p page encoding. */
87 	explicit U32String(const char *str, CodePage page = kUtf8);
88 
89 	/** Construct a new string containing exactly @p len characters read from address @p str. */
90 	U32String(const char *str, uint32 len, CodePage page = kUtf8);
91 
92 	/** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */
93 	U32String(const char *beginP, const char *endP, CodePage page = kUtf8);
94 
95 	/** Construct a copy of the given string. */
96 	U32String(const String &str, CodePage page = kUtf8);
97 
98 	/** Construct a string consisting of the given character. */
99 	explicit U32String(value_type c);
100 
101 	/** Assign a given string to this string. */
102 	U32String &operator=(const U32String &str);
103 
104 	/** @overload */
105 	U32String &operator=(const String &str);
106 
107 	/** @overload */
108 	U32String &operator=(const value_type *str);
109 
110 	/** @overload */
111 	U32String &operator=(const char *str);
112 
113 	/** Append the given string to this string. */
114 	U32String &operator+=(const U32String &str);
115 
116 	/** @overload */
117 	U32String &operator+=(value_type c);
118 
119 	using BaseString<value_type>::operator==;
120 	using BaseString<value_type>::operator!=;
121 
122 	/** Check whether this string is identical to string @p x. */
123 	bool operator==(const String &x) const;
124 
125 	/** @overload */
126 	bool operator==(const char *x) const;
127 
128 	/** Check whether this string is different than string @p x. */
129 	bool operator!=(const String &x) const;
130 
131 	/** @overload */
132 	bool operator!=(const char *x) const;
133 
134 	/** Convert the string to the given @p page encoding and return the result as a new String. */
135 	String encode(CodePage page = kUtf8) const;
136 
137 	/**
138 	 * Print formatted data into a U32String object.
139 	 *
140 	 * Similar to sprintf, except that it stores the result
141 	 * in a (variably sized) string instead of a fixed-size buffer.
142 	 */
143 	static U32String format(U32String fmt, ...);
144 
145 	/** @overload **/
146 	static U32String format(const char *fmt, ...);
147 
148 	/**
149 	 * Print formatted data into a U32String object.
150 	 * The method takes in the output by reference and works with iterators.
151 	 */
152 	static int vformat(const value_type *fmt, const value_type *fmtEnd, U32String &output, va_list args);
153 
154 	/**
155 	 * Helper function for vformat. Convert an int to string.
156 	 * Minimal implementation, only for base 10.
157 	 */
158 	static char* itoa(int num, char* str, int base);
159 
160 	using BaseString<value_type>::insertString;
161 	void insertString(const char *s, uint32 p, CodePage page = kUtf8);   /*!< Insert string @p s into this string at position @p p. */
162 	void insertString(const String &s, uint32 p, CodePage page = kUtf8); /*!< @overload */
163 
164 	/** Return a substring of this string */
165 	U32String substr(size_t pos = 0, size_t len = npos) const;
166 
u32_str()167 	const uint32 *u32_str() const {   /*!< Return the string as a UTF-32 pointer. */
168 		return (const uint32 *) _str;
169 	}
170 
171 	/** Decode a big endian UTF-16 string into a U32String. */
172 	static Common::U32String decodeUTF16BE(const uint16 *start, uint len);
173 
174 	/** Decode a little endian UTF-16 string into a U32String. */
175 	static Common::U32String decodeUTF16LE(const uint16 *start, uint len);
176 
177 	/** Decode a native UTF-16 string into a U32String. */
178 	static Common::U32String decodeUTF16Native(const uint16 *start, uint len);
179 
180 	/** Transform a U32String into UTF-16 representation (big endian). The result must be freed. */
181 	uint16 *encodeUTF16BE(uint *len = nullptr) const;
182 
183 	/** Transform a U32String into UTF-16 representation (native endian). The result must be freed. */
184 	uint16 *encodeUTF16LE(uint *len = nullptr) const;
185 
186 	/** Transform a U32String into UTF-16 representation (native encoding). The result must be freed. */
187 	uint16 *encodeUTF16Native(uint *len = nullptr) const;
188 
189 private:
190 	void decodeInternal(const char *str, uint32 len, CodePage page);
191 	void decodeOneByte(const char *str, uint32 len, CodePage page);
192 	void decodeWindows932(const char *src, uint32 len);
193 	void decodeWindows949(const char *src, uint32 len);
194 	void decodeWindows950(const char *src, uint32 len);
195 	void decodeUTF8(const char *str, uint32 len);
196 
197 	friend class String;
198 };
199 
200 /** Concatenate strings @p x and @p y. */
201 U32String operator+(const U32String &x, const U32String &y);
202 
203 /** Append the given @p y character to the given @p x string. */
204 U32String operator+(const U32String &x, U32String::value_type y);
205 
206 /**
207  * Converts string with all non-printable characters properly escaped
208  * with use of C++ escape sequences.
209  * Unlike the String version, this does not escape characters with
210  * codepoints > 127.
211  *
212  * @param src The source string.
213  * @param keepNewLines Whether keep newlines or convert them to '\n', default: true.
214  * @return The converted string.
215  */
216 U32String toPrintable(const U32String &src, bool keepNewLines = true);
217 
218 /** @} */
219 
220 } // End of namespace Common
221 
222 #endif
223