1 // Modified from OpenGUI under lenient license
2 // Original copyright details and licensing below:
3 // OpenGUI (http://opengui.sourceforge.net)
4 // This source code is released under the BSD License
5 
6 // Permission is given to the Ogre project to use the contents of file within its
7 // source and binary applications, as well as any derivative works, in accordance
8 // with the terms of any license under which Ogre is or will be distributed.
9 //
10 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates
11 // to this file, under any terms that it deems fit, and is not required to maintain
12 // the original BSD licensing terms of this file, however OpenGUI retains the right
13 // to present its copy of this file under the terms of any license under which
14 // OpenGUI is distributed.
15 //
16 // Ogre is not required to release to OpenGUI any future changes that it makes to
17 // this file, and understands and agrees that any such changes that are released
18 // back to OpenGUI will become available under the terms of any license under which
19 // OpenGUI is distributed.
20 //
21 // For brevity, this permission text may be removed from this file if desired.
22 // The original record kept within the SourceForge (http://sourceforge.net/) tracker
23 // is sufficient.
24 //
25 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007]
26 
27 #ifndef __OGRE_UTFSTRING_H__
28 #define __OGRE_UTFSTRING_H__
29 
30 
31 #include "OgrePrerequisites.h"
32 #include "OgreHeaderPrefix.h"
33 
34 #if OGRE_UNICODE_SUPPORT
35 
36 // these are explained later
37 #include <iterator>
38 #include <string>
39 #include <stdexcept>
40 
41 // Workaround for VC7/7.1/8.0/9.0 (2003 - 2008):
42 //      when build with /MD or /MDd, VC have both std::basic_string<unsigned short> and
43 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header
44 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile
45 // option). And since this file used both of them, causing compiler instantiating another
46 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll.
47 //
48 #if OGRE_COMPILER == OGRE_COMPILER_MSVC && (OGRE_COMP_VER >= 1300 && OGRE_COMP_VER < 1600)
49 
50 # if defined(_DLL_CPPLIB)
51 
52 namespace std
53 {
54     template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
55 	    allocator<unsigned short> >;
56 
57     template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
58 	    allocator<__wchar_t> >;
59 }
60 
61 # endif // defined(_DLL_CPPLIB)
62 
63 #endif  // OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_COMP_VER == 1300
64 
65 
66 namespace Ogre {
67 	/** \addtogroup Core
68 	*  @{
69 	*/
70 	/** \addtogroup Overlays
71 	*  @{
72 	*/
73 
74 	/* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS
75 	=NOTICE=
76 	This class is not a complete Unicode solution. It purposefully does not
77 	provide certain functionality, such as proper lexical sorting for
78 	Unicode values. It does provide comparison operators for the sole purpose
79 	of using UTFString as an index with std::map and other operator< sorted
80 	containers, but it should NOT be relied upon for meaningful lexical
81 	operations, such as alphabetical sorts. If you need this type of
82 	functionality, look into using ICU instead (http://icu.sourceforge.net/).
83 
84 	=REQUIREMENTS=
85 	There are a few requirements for proper operation. They are fairly small,
86 	and shouldn't restrict usage on any reasonable target.
87 	* Compiler must support unsigned 16-bit integer types
88 	* Compiler must support signed 32-bit integer types
89 	* wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such
90 	    using the WCHAR_UTF16 macro as outlined below.
91 	* You must include <iterator>, <string>, and <wchar>. Probably more, but
92 	    these are the most obvious.
93 
94 	=REQUIRED PREPROCESSOR MACROS=
95 	This class requires two preprocessor macros to be defined in order to
96 	work as advertised.
97 	INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int)
98 	UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short)
99 
100 	Additionally, a third macro should be defined to control the evaluation of wchar_t:
101 	WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points,
102 	    such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit
103 		integer representing UTF-32 code points.
104 	*/
105 
106 	// THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS
107 #ifdef __STDC_ISO_10646__
108 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger)
109 // so we can safely skip the rest of the testing
110 #else // #ifdef __STDC_ISO_10646__
111 #if defined( __WIN32__ ) || defined( _WIN32 ) || !defined(ANDROID)
112 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
113 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
114 #if OGRE_COMPILER != OGRE_COMPILER_GCCE
115 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
116 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
117 #endif // #if WCHAR_MAX <= 0xFFFF
118 #endif
119 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
120 #endif // #ifdef __STDC_ISO_10646__
121 
122 
123 // OGRE_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of
124 // uint16 or uint32.
125 #if OGRE_COMPILER == OGRE_COMPILER_MSVC
126 
127 // Don't define wchar_t related functions since it'll duplicate
128 // with UTFString::code_point related functions when compile
129 // without /Zc:wchar_t, because in this case both of them are
130 // a typedef of uint16.
131 # if defined(_NATIVE_WCHAR_T_DEFINED)
132 #   define OGRE_IS_NATIVE_WCHAR_T      1
133 # else
134 #   define OGRE_IS_NATIVE_WCHAR_T      0
135 # endif
136 #else   // OGRE_COMPILER != OGRE_COMPILER_MSVC
137 
138 // Assumed wchar_t is natively for other compilers
139 #   define OGRE_IS_NATIVE_WCHAR_T     1
140 
141 #endif  // OGRE_COMPILER == OGRE_COMPILER_MSVC
142 
143 	//! A UTF-16 string with implicit conversion to/from std::string and std::wstring
144 	/*! This class provides a complete 1 to 1 map of most std::string functions (at least to my
145 	knowledge). Implicit conversions allow this string class to work with all common C++ string
146 	formats, with specialty functions defined where implicit conversion would cause potential
147 	problems or is otherwise unavailable.
148 
149 	Some additional functionality is present to assist in working with characters using the
150 	32-bit UTF-32 encoding. (Which is guaranteed to fit any Unicode character into a single
151 	code point.) \b Note: Reverse iterators do not have this functionality due to the
152 	ambiguity that surrounds working with UTF-16 in reverse. (Such as, where should an
153 	iterator point to represent the beginning of a surrogate pair?)
154 
155 
156 	\par Supported Input Types
157 	The supported string types for input, and their assumed encoding schemes, are:
158 	- std::string (UTF-8)
159 	- char* (UTF-8)
160 	- std::wstring (autodetected UTF-16 / UTF-32 based on compiler)
161 	- wchar_t* (autodetected UTF-16 / UTF-32 based on compiler)
162 
163 
164 	\see
165 	- For additional information on UTF-16 encoding: http://en.wikipedia.org/wiki/UTF-16
166 	- For additional information on UTF-8 encoding: http://en.wikipedia.org/wiki/UTF-8
167 	- For additional information on UTF-32 encoding: http://en.wikipedia.org/wiki/UTF-32
168 	*/
169 	class _OgreExport UTFString {
170 		// constants used in UTF-8 conversions
171 		static const unsigned char _lead1 = 0xC0;      //110xxxxx
172 		static const unsigned char _lead1_mask = 0x1F; //00011111
173 		static const unsigned char _lead2 = 0xE0;      //1110xxxx
174 		static const unsigned char _lead2_mask = 0x0F; //00001111
175 		static const unsigned char _lead3 = 0xF0;      //11110xxx
176 		static const unsigned char _lead3_mask = 0x07; //00000111
177 		static const unsigned char _lead4 = 0xF8;      //111110xx
178 		static const unsigned char _lead4_mask = 0x03; //00000011
179 		static const unsigned char _lead5 = 0xFC;      //1111110x
180 		static const unsigned char _lead5_mask = 0x01; //00000001
181 		static const unsigned char _cont = 0x80;       //10xxxxxx
182 		static const unsigned char _cont_mask = 0x3F;  //00111111
183 
184 	public:
185 		//! size type used to indicate string size and character positions within the string
186 		typedef size_t size_type;
187 		//! the usual constant representing: not found, no limit, etc
188 		static const size_type npos = static_cast<size_type>(~0);
189 
190 		//! a single 32-bit Unicode character
191 		typedef uint32 unicode_char;
192 
193 		//! a single UTF-16 code point
194 		typedef uint16 code_point;
195 
196 		//! value type typedef for use in iterators
197 		typedef code_point value_type;
198 
199 		typedef std::basic_string<code_point> dstring; // data string
200 
201 		//! string type used for returning UTF-32 formatted data
202 		typedef std::basic_string<unicode_char> utf32string;
203 
204 		//! This exception is used when invalid data streams are encountered
205 	class _OgreExport invalid_data: public std::runtime_error { /* i don't know why the beautifier is freaking out on this line */
206 		public:
207 			//! constructor takes a string message that can be later retrieved by the what() function
invalid_data(const std::string & _Message)208 			explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message ) {
209 				/* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */
210 			}
211 		};
212 
213 		//#########################################################################
214 		//! base iterator class for UTFString
215 	class _OgreExport _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type> { /* i don't know why the beautifier is freaking out on this line */
216 			friend class UTFString;
217 		protected:
218 			_base_iterator();
219 
220 			void _seekFwd( size_type c );
221 			void _seekRev( size_type c );
222 			void _become( const _base_iterator& i );
223 			bool _test_begin() const;
224 			bool _test_end() const;
225 			size_type _get_index() const;
226 			void _jump_to( size_type index );
227 
228 			unicode_char _getCharacter() const;
229 			int _setCharacter( unicode_char uc );
230 
231 			void _moveNext();
232 			void _movePrev();
233 
234 			dstring::iterator mIter;
235 			UTFString* mString;
236 		};
237 
238 		//#########################################################################
239 		// FORWARD ITERATORS
240 		//#########################################################################
241 		class _const_fwd_iterator; // forward declaration
242 
243 		//! forward iterator for UTFString
244 	class _OgreExport _fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
245 			friend class _const_fwd_iterator;
246 		public:
247 			_fwd_iterator();
248 			_fwd_iterator( const _fwd_iterator& i );
249 
250 			//! pre-increment
251 			_fwd_iterator& operator++();
252 			//! post-increment
253 			_fwd_iterator operator++( int );
254 
255 			//! pre-decrement
256 			_fwd_iterator& operator--();
257 			//! post-decrement
258 			_fwd_iterator operator--( int );
259 
260 			//! addition operator
261 			_fwd_iterator operator+( difference_type n );
262 			//! subtraction operator
263 			_fwd_iterator operator-( difference_type n );
264 
265 			//! addition assignment operator
266 			_fwd_iterator& operator+=( difference_type n );
267 			//! subtraction assignment operator
268 			_fwd_iterator& operator-=( difference_type n );
269 
270 			//! dereference operator
271 			value_type& operator*() const;
272 
273 			//! dereference at offset operator
274 			value_type& operator[]( difference_type n ) const;
275 
276 			//! advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
277 			_fwd_iterator& moveNext();
278 			//! rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
279 			_fwd_iterator& movePrev();
280 			//! Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed)
281 			unicode_char getCharacter() const;
282 			//! Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); returns the amount of string length change caused by the operation
283 			int setCharacter( unicode_char uc );
284 		};
285 
286 
287 
288 		//#########################################################################
289 		//! const forward iterator for UTFString
290 	class _OgreExport _const_fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
291 		public:
292 			_const_fwd_iterator();
293 			_const_fwd_iterator( const _const_fwd_iterator& i );
294 			_const_fwd_iterator( const _fwd_iterator& i );
295 
296 			//! pre-increment
297 			_const_fwd_iterator& operator++();
298 			//! post-increment
299 			_const_fwd_iterator operator++( int );
300 
301 			//! pre-decrement
302 			_const_fwd_iterator& operator--();
303 			//! post-decrement
304 			_const_fwd_iterator operator--( int );
305 
306 			//! addition operator
307 			_const_fwd_iterator operator+( difference_type n );
308 			//! subtraction operator
309 			_const_fwd_iterator operator-( difference_type n );
310 
311 			//! addition assignment operator
312 			_const_fwd_iterator& operator+=( difference_type n );
313 			//! subtraction assignment operator
314 			_const_fwd_iterator& operator-=( difference_type n );
315 
316 			//! dereference operator
317 			const value_type& operator*() const;
318 
319 			//! dereference at offset operator
320 			const value_type& operator[]( difference_type n ) const;
321 
322 			//! advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
323 			_const_fwd_iterator& moveNext();
324 			//! rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
325 			_const_fwd_iterator& movePrev();
326 			//! Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed)
327 			unicode_char getCharacter() const;
328 
329 			//! difference operator
330 			friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
331 			//! equality operator
332 			friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
333 			//! inequality operator
334 			friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
335 			//! less than
336 			friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
337 			//! less than or equal
338 			friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
339 			//! greater than
340 			friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
341 			//! greater than or equal
342 			friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
343 
344 		};
345 
346 		//#########################################################################
347 		// REVERSE ITERATORS
348 		//#########################################################################
349 		class _const_rev_iterator; // forward declaration
350 		//! forward iterator for UTFString
351 	class _OgreExport _rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
352 			friend class _const_rev_iterator;
353 		public:
354 			_rev_iterator();
355 			_rev_iterator( const _rev_iterator& i );
356 
357 			//! pre-increment
358 			_rev_iterator& operator++();
359 			//! post-increment
360 			_rev_iterator operator++( int );
361 
362 			//! pre-decrement
363 			_rev_iterator& operator--();
364 			//! post-decrement
365 			_rev_iterator operator--( int );
366 
367 			//! addition operator
368 			_rev_iterator operator+( difference_type n );
369 			//! subtraction operator
370 			_rev_iterator operator-( difference_type n );
371 
372 			//! addition assignment operator
373 			_rev_iterator& operator+=( difference_type n );
374 			//! subtraction assignment operator
375 			_rev_iterator& operator-=( difference_type n );
376 
377 			//! dereference operator
378 			value_type& operator*() const;
379 
380 			//! dereference at offset operator
381 			value_type& operator[]( difference_type n ) const;
382 		};
383 		//#########################################################################
384 		//! const reverse iterator for UTFString
385 	class _OgreExport _const_rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
386 		public:
387 			_const_rev_iterator();
388 			_const_rev_iterator( const _const_rev_iterator& i );
389 			_const_rev_iterator( const _rev_iterator& i );
390 			//! pre-increment
391 			_const_rev_iterator& operator++();
392 			//! post-increment
393 			_const_rev_iterator operator++( int );
394 
395 			//! pre-decrement
396 			_const_rev_iterator& operator--();
397 			//! post-decrement
398 			_const_rev_iterator operator--( int );
399 
400 			//! addition operator
401 			_const_rev_iterator operator+( difference_type n );
402 			//! subtraction operator
403 			_const_rev_iterator operator-( difference_type n );
404 
405 			//! addition assignment operator
406 			_const_rev_iterator& operator+=( difference_type n );
407 			//! subtraction assignment operator
408 			_const_rev_iterator& operator-=( difference_type n );
409 
410 			//! dereference operator
411 			const value_type& operator*() const;
412 
413 			//! dereference at offset operator
414 			const value_type& operator[]( difference_type n ) const;
415 
416 			//! difference operator
417 			friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right );
418 			//! equality operator
419 			friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right );
420 			//! inequality operator
421 			friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right );
422 			//! less than
423 			friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right );
424 			//! less than or equal
425 			friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right );
426 			//! greater than
427 			friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right );
428 			//! greater than or equal
429 			friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right );
430 		};
431 		//#########################################################################
432 
433 		typedef _fwd_iterator iterator;                     //!< iterator
434 		typedef _rev_iterator reverse_iterator;             //!< reverse iterator
435 		typedef _const_fwd_iterator const_iterator;         //!< const iterator
436 		typedef _const_rev_iterator const_reverse_iterator; //!< const reverse iterator
437 
438 
439 		//!\name Constructors/Destructor
440 		//@{
441 		//! default constructor, creates an empty string
442 		UTFString();
443 		//! copy constructor
444 		UTFString( const UTFString& copy );
445 		//! \a length copies of \a ch
446 		UTFString( size_type length, const code_point& ch );
447 		//! duplicate of nul-terminated sequence \a str
448 		UTFString( const code_point* str );
449 		//! duplicate of \a str, \a length code points long
450 		UTFString( const code_point* str, size_type length );
451 		//! substring of \a str starting at \a index and \a length code points long
452 		UTFString( const UTFString& str, size_type index, size_type length );
453 #if OGRE_IS_NATIVE_WCHAR_T
454 		//! duplicate of nul-terminated \c wchar_t array
455 		UTFString( const wchar_t* w_str );
456 		//! duplicate of \a w_str, \a length characters long
457 		UTFString( const wchar_t* w_str, size_type length );
458 #endif
459 		//! duplicate of \a wstr
460 		UTFString( const std::wstring& wstr );
461 		//! duplicate of nul-terminated C-string \a c_str (UTF-8 encoding)
462 		UTFString( const char* c_str );
463 		//! duplicate of \a c_str, \a length characters long (UTF-8 encoding)
464 		UTFString( const char* c_str, size_type length );
465 		//! duplicate of \a str (UTF-8 encoding)
466 		UTFString( const std::string& str );
467 #if OGRE_STRING_USE_CUSTOM_MEMORY_ALLOCATOR
468 		UTFString( const Ogre::String& str );
469 #endif
470 
471 		//! destructor
472 		~UTFString();
473 		//@}
474 
475 		//////////////////////////////////////////////////////////////////////////
476 
477 		//!\name Utility functions
478 		//@{
479 		//! Returns the number of code points in the current string
480 		size_type size() const;
481 		//! Returns the number of code points in the current string
482 		size_type length() const;
483 		//! Returns the number of Unicode characters in the string
484 		/*! Executes in linear time. */
485 		size_type length_Characters() const;
486 		//! returns the maximum number of UTF-16 code points that the string can hold
487 		size_type max_size() const;
488 		//! sets the capacity of the string to at least \a size code points
489 		void reserve( size_type size );
490 		//! changes the size of the string to \a size, filling in any new area with \a val
491 		void resize( size_type num, const code_point& val = 0 );
492 		//! exchanges the elements of the current string with those of \a from
493 		void swap( UTFString& from );
494 		//! returns \c true if the string has no elements, \c false otherwise
495 		bool empty() const;
496 		//! returns a pointer to the first character in the current string
497 		const code_point* c_str() const;
498 		//! returns a pointer to the first character in the current string
499 		const code_point* data() const;
500 		//! returns the number of elements that the string can hold before it will need to allocate more space
501 		size_type capacity() const;
502 		//! deletes all of the elements in the string
503 		void clear();
504 		//! returns a substring of the current string, starting at \a index, and \a num characters long.
505 		/*! If \a num is omitted, it will default to \c UTFString::npos, and the substr() function will simply return the remainder of the string starting at \a index. */
506 		UTFString substr( size_type index, size_type num = npos ) const;
507 		//! appends \a val to the end of the string
508 		void push_back( unicode_char val );
509 #if OGRE_IS_NATIVE_WCHAR_T
510 		//! appends \a val to the end of the string
511 		void push_back( wchar_t val );
512 #endif
513 		//! appends \a val to the end of the string
514 		/*! This can be used to push surrogate pair code points, you'll just need to push them
515 		one after the other. */
516 		void push_back( code_point val );
517 		//! appends \a val to the end of the string
518 		/*! Limited to characters under the 127 value barrier. */
519 		void push_back( char val );
520 		//! returns \c true if the given Unicode character \a ch is in this string
521 		bool inString( unicode_char ch ) const;
522 		//@}
523 
524 		//////////////////////////////////////////////////////////////////////////
525 
526 		//!\name Stream variations
527 		//@{
528 		//! returns the current string in UTF-8 form within a std::string
529 		const std::string& asUTF8() const;
530 		//! returns the current string in UTF-8 form as a nul-terminated char array
531 		const char* asUTF8_c_str() const;
532 		//! returns the current string in UTF-32 form within a utf32string
533 		const utf32string& asUTF32() const;
534 		//! returns the current string in UTF-32 form as a nul-terminated unicode_char array
535 		const unicode_char* asUTF32_c_str() const;
536 		//! returns the current string in the native form of std::wstring
537 		const std::wstring& asWStr() const;
538 		//! returns the current string in the native form of a nul-terminated wchar_t array
539 		const wchar_t* asWStr_c_str() const;
540 		//@}
541 
542 		//////////////////////////////////////////////////////////////////////////
543 
544 		//!\name Single Character Access
545 		//@{
546 		//! returns a reference to the element in the string at index \c loc
547 		code_point& at( size_type loc );
548 		//! returns a reference to the element in the string at index \c loc
549 		const code_point& at( size_type loc ) const;
550 		//! returns the data point \a loc evaluated as a UTF-32 value
551 		/*! This function will will only properly decode surrogate pairs when \a loc points to the index
552 		of a lead code point that is followed by a trailing code point. Evaluating the trailing code point
553 		itself, or pointing to a code point that is a sentinel value (part of a broken pair) will return
554 		the value of just that code point (not a valid Unicode value, but useful as a sentinel value). */
555 		unicode_char getChar( size_type loc ) const;
556 		//! sets the value of the character at \a loc to the Unicode value \a ch (UTF-32)
557 		/*! Providing sentinel values (values between U+D800-U+DFFF) are accepted, but you should be aware
558 		that you can also unwittingly create a valid surrogate pair if you don't pay attention to what you
559 		are doing. @note This operation may also lengthen the string if a surrogate pair is needed to
560 		represent the value given, but one is not available to replace; or alternatively shorten the string
561 		if an existing surrogate pair is replaced with a character that is representable without a surrogate
562 		pair. The return value will signify any lengthening or shortening performed, returning 0 if no change
563 		was made, -1 if the string was shortened, or 1 if the string was lengthened. Any single call can
564 		only change the string length by + or - 1. */
565 		int setChar( size_type loc, unicode_char ch );
566 		//@}
567 
568 		//////////////////////////////////////////////////////////////////////////
569 
570 		//!\name iterator acquisition
571 		//@{
572 		//! returns an iterator to the first element of the string
573 		iterator begin();
574 		//! returns an iterator to the first element of the string
575 		const_iterator begin() const;
576 		//! returns an iterator just past the end of the string
577 		iterator end();
578 		//! returns an iterator just past the end of the string
579 		const_iterator end() const;
580 		//! returns a reverse iterator to the last element of the string
581 		reverse_iterator rbegin();
582 		//! returns a reverse iterator to the last element of the string
583 		const_reverse_iterator rbegin() const;
584 		//! returns a reverse iterator just past the beginning of the string
585 		reverse_iterator rend();
586 		//! returns a reverse iterator just past the beginning of the string
587 		const_reverse_iterator rend() const;
588 		//@}
589 
590 		//////////////////////////////////////////////////////////////////////////
591 
592 		//!\name assign
593 		//@{
594 		//! gives the current string the values from \a start to \a end
595 		UTFString& assign( iterator start, iterator end );
596 		//! assign \a str to the current string
597 		UTFString& assign( const UTFString& str );
598 		//! assign the nul-terminated \a str to the current string
599 		UTFString& assign( const code_point* str );
600 		//! assign the first \a num characters of \a str to the current string
601 		UTFString& assign( const code_point* str, size_type num );
602 		//! assign \a len entries from \a str to the current string, starting at \a index
603 		UTFString& assign( const UTFString& str, size_type index, size_type len );
604 		//! assign \a num copies of \a ch to the current string
605 		UTFString& assign( size_type num, const code_point& ch );
606 		//! assign \a wstr to the current string (\a wstr is treated as a UTF-16 stream)
607 		UTFString& assign( const std::wstring& wstr );
608 #if OGRE_IS_NATIVE_WCHAR_T
609 		//! assign \a w_str to the current string
610 		UTFString& assign( const wchar_t* w_str );
611 		//! assign the first \a num characters of \a w_str to the current string
612 		UTFString& assign( const wchar_t* w_str, size_type num );
613 #endif
614 		//! assign \a str to the current string (\a str is treated as a UTF-8 stream)
615 		UTFString& assign( const std::string& str );
616 		//! assign \a c_str to the current string (\a c_str is treated as a UTF-8 stream)
617 		UTFString& assign( const char* c_str );
618 		//! assign the first \a num characters of \a c_str to the current string (\a c_str is treated as a UTF-8 stream)
619 		UTFString& assign( const char* c_str, size_type num );
620 		//@}
621 
622 		//////////////////////////////////////////////////////////////////////////
623 
624 		//!\name append
625 		//@{
626 		//! appends \a str on to the end of the current string
627 		UTFString& append( const UTFString& str );
628 		//! appends \a str on to the end of the current string
629 		UTFString& append( const code_point* str );
630 		//! appends a substring of \a str starting at \a index that is \a len characters long on to the end of the current string
631 		UTFString& append( const UTFString& str, size_type index, size_type len );
632 		//! appends \a num characters of \a str on to the end of the current string
633 		UTFString& append( const code_point* str, size_type num );
634 		//! appends \a num repetitions of \a ch on to the end of the current string
635 		UTFString& append( size_type num, code_point ch );
636 		//! appends the sequence denoted by \a start and \a end on to the end of the current string
637 		UTFString& append( iterator start, iterator end );
638 #if OGRE_IS_NATIVE_WCHAR_T
639 		//! appends \a num characters of \a str on to the end of the current string
640 		UTFString& append( const wchar_t* w_str, size_type num );
641 		//! appends \a num repetitions of \a ch on to the end of the current string
642 		UTFString& append( size_type num, wchar_t ch );
643 #endif
644 		//! appends \a num characters of \a str on to the end of the current string  (UTF-8 encoding)
645 		UTFString& append( const char* c_str, size_type num );
646 		//! appends \a num repetitions of \a ch on to the end of the current string (Unicode values less than 128)
647 		UTFString& append( size_type num, char ch );
648 		//! appends \a num repetitions of \a ch on to the end of the current string (Full Unicode spectrum)
649 		UTFString& append( size_type num, unicode_char ch );
650 		//@}
651 
652 		//////////////////////////////////////////////////////////////////////////
653 
654 		//!\name insert
655 		//@{
656 		//! inserts \a ch before the code point denoted by \a i
657 		iterator insert( iterator i, const code_point& ch );
658 		//! inserts \a str into the current string, at location \a index
659 		UTFString& insert( size_type index, const UTFString& str );
660 		//! inserts \a str into the current string, at location \a index
insert(size_type index,const code_point * str)661 		UTFString& insert( size_type index, const code_point* str ) {
662 			mData.insert( index, str );
663 			return *this;
664 		}
665 		//! inserts a substring of \a str (starting at \a index2 and \a num code points long) into the current string, at location \a index1
666 		UTFString& insert( size_type index1, const UTFString& str, size_type index2, size_type num );
667 		//! inserts the code points denoted by \a start and \a end into the current string, before the code point specified by \a i
668 		void insert( iterator i, iterator start, iterator end );
669 		//! inserts \a num code points of \a str into the current string, at location \a index
670 		UTFString& insert( size_type index, const code_point* str, size_type num );
671 #if OGRE_IS_NATIVE_WCHAR_T
672 		//! inserts \a num code points of \a str into the current string, at location \a index
673 		UTFString& insert( size_type index, const wchar_t* w_str, size_type num );
674 #endif
675 		//! inserts \a num code points of \a str into the current string, at location \a index
676 		UTFString& insert( size_type index, const char* c_str, size_type num );
677 		//! inserts \a num copies of \a ch into the current string, at location \a index
678 		UTFString& insert( size_type index, size_type num, code_point ch );
679 #if OGRE_IS_NATIVE_WCHAR_T
680 		//! inserts \a num copies of \a ch into the current string, at location \a index
681 		UTFString& insert( size_type index, size_type num, wchar_t ch );
682 #endif
683 		//! inserts \a num copies of \a ch into the current string, at location \a index
684 		UTFString& insert( size_type index, size_type num, char ch );
685 		//! inserts \a num copies of \a ch into the current string, at location \a index
686 		UTFString& insert( size_type index, size_type num, unicode_char ch );
687 		//! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i
688 		void insert( iterator i, size_type num, const code_point& ch );
689 #if OGRE_IS_NATIVE_WCHAR_T
690 		//! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i
691 		void insert( iterator i, size_type num, const wchar_t& ch );
692 #endif
693 		//! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i
694 		void insert( iterator i, size_type num, const char& ch );
695 		//! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i
696 		void insert( iterator i, size_type num, const unicode_char& ch );
697 		//@}
698 
699 		//////////////////////////////////////////////////////////////////////////
700 
701 		//!\name erase
702 		//@{
703 		//! removes the code point pointed to by \a loc, returning an iterator to the next character
704 		iterator erase( iterator loc );
705 		//! removes the code points between \a start and \a end (including the one at \a start but not the one at \a end), returning an iterator to the code point after the last code point removed
706 		iterator erase( iterator start, iterator end );
707 		//! removes \a num code points from the current string, starting at \a index
708 		UTFString& erase( size_type index = 0, size_type num = npos );
709 		//@}
710 
711 		//////////////////////////////////////////////////////////////////////////
712 
713 		//!\name replace
714 		//@{
715 		//! replaces up to \a num1 code points of the current string (starting at \a index1) with \a str
716 		UTFString& replace( size_type index1, size_type num1, const UTFString& str );
717 		//! replaces up to \a num1 code points of the current string (starting at \a index1) with up to \a num2 code points from \a str
718 		UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type num2 );
719 		//! replaces up to \a num1 code points of the current string (starting at \a index1) with up to \a num2 code points from \a str beginning at \a index2
720 		UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type index2, size_type num2 );
721 		//! replaces code points in the current string from \a start to \a end with \a num code points from \a str
722 		UTFString& replace( iterator start, iterator end, const UTFString& str, size_type num = npos );
723 		//! replaces up to \a num1 code points in the current string (beginning at \a index) with \c num2 copies of \c ch
724 		UTFString& replace( size_type index, size_type num1, size_type num2, code_point ch );
725 		//! replaces the code points in the current string from \a start to \a end with \a num copies of \a ch
726 		UTFString& replace( iterator start, iterator end, size_type num, code_point ch );
727 		//@}
728 
729 		//////////////////////////////////////////////////////////////////////////
730 
731 		//!\name compare
732 		//@{
733 		//! compare \a str to the current string
734 		int compare( const UTFString& str ) const;
735 		//! compare \a str to the current string
736 		int compare( const code_point* str ) const;
737 		//! compare \a str to a substring of the current string, starting at \a index for \a length characters
738 		int compare( size_type index, size_type length, const UTFString& str ) const;
739 		//! compare a substring of \a str to a substring of the current string, where \a index2 and \a length2 refer to \a str and \a index and \a length refer to the current string
740 		int compare( size_type index, size_type length, const UTFString& str, size_type index2, size_type length2 ) const;
741 		//! compare a substring of \a str to a substring of the current string, where the substring of \a str begins at zero and is \a length2 characters long, and the substring of the current string begins at \a index and is \a length  characters long
742 		int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const;
743 #if OGRE_IS_NATIVE_WCHAR_T
744 		//! compare a substring of \a str to a substring of the current string, where the substring of \a str begins at zero and is \a length2 elements long, and the substring of the current string begins at \a index and is \a length characters long
745 		int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const;
746 #endif
747 		//! compare a substring of \a str to a substring of the current string, where the substring of \a str begins at zero and is \a length2 <b>UTF-8 code points</b> long, and the substring of the current string begins at \a index and is \a length characters long
748 		int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const;
749 		//@}
750 
751 		//////////////////////////////////////////////////////////////////////////
752 
753 		//!\name find & rfind
754 		//@{
755 		//! returns the index of the first occurrence of \a str within the current string, starting at \a index; returns \c UTFString::npos if nothing is found
756 		/*! \a str is a UTF-16 encoded string, but through implicit casting can also be a UTF-8 encoded string (const char* or std::string) */
757 		size_type find( const UTFString& str, size_type index = 0 ) const;
758 		//! returns the index of the first occurrence of \a str within the current string and within \a length code points, starting at \a index; returns \c UTFString::npos if nothing is found
759 		/*! \a cp_str is a UTF-16 encoded string */
760 		size_type find( const code_point* cp_str, size_type index, size_type length ) const;
761 		//! returns the index of the first occurrence of \a str within the current string and within \a length code points, starting at \a index; returns \c UTFString::npos if nothing is found
762 		/*! \a cp_str is a UTF-8 encoded string */
763 		size_type find( const char* c_str, size_type index, size_type length ) const;
764 #if OGRE_IS_NATIVE_WCHAR_T
765 		//! returns the index of the first occurrence of \a str within the current string and within \a length code points, starting at \a index; returns \c UTFString::npos if nothing is found
766 		/*! \a cp_str is a UTF-16 encoded string */
767 		size_type find( const wchar_t* w_str, size_type index, size_type length ) const;
768 #endif
769 		//! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found
770 		/*! \a ch is only capable of representing Unicode values up to U+007F (127) */
771 		size_type find( char ch, size_type index = 0 ) const;
772 		//! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found
773 		/*! \a ch is only capable of representing Unicode values up to U+FFFF (65535) */
774 		size_type find( code_point ch, size_type index = 0 ) const;
775 #if OGRE_IS_NATIVE_WCHAR_T
776 		//! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found
777 		/*! \a ch is only capable of representing Unicode values up to U+FFFF (65535) */
778 		size_type find( wchar_t ch, size_type index = 0 ) const;
779 #endif
780 		//! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found
781 		/*! \a ch can fully represent any Unicode character */
782 		size_type find( unicode_char ch, size_type index = 0 ) const;
783 
784 		//! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
785 		size_type rfind( const UTFString& str, size_type index = 0 ) const;
786 		//! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index, searching at most \a num characters; returns \c UTFString::npos if nothing is found
787 		size_type rfind( const code_point* cp_str, size_type index, size_type num ) const;
788 		//! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index, searching at most \a num characters; returns \c UTFString::npos if nothing is found
789 		size_type rfind( const char* c_str, size_type index, size_type num ) const;
790 #if OGRE_IS_NATIVE_WCHAR_T
791 		//! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index, searching at most \a num characters; returns \c UTFString::npos if nothing is found
792 		size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const;
793 #endif
794 		//! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
795 		size_type rfind( char ch, size_type index = 0 ) const;
796 		//! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
797 		size_type rfind( code_point ch, size_type index ) const;
798 #if OGRE_IS_NATIVE_WCHAR_T
799 		//! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
800 		size_type rfind( wchar_t ch, size_type index = 0 ) const;
801 #endif
802 		//! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
803 		size_type rfind( unicode_char ch, size_type index = 0 ) const;
804 		//@}
805 
806 		//////////////////////////////////////////////////////////////////////////
807 
808 		//!\name find_first/last_(not)_of
809 		//@{
810 		//! Returns the index of the first character within the current string that matches \b any character in \a str, beginning the search at \a index and searching at most \a num characters; returns \c UTFString::npos if nothing is found
811 		size_type find_first_of( const UTFString &str, size_type index = 0, size_type num = npos ) const;
812 		//! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found
813 		size_type find_first_of( code_point ch, size_type index = 0 ) const;
814 		//! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found
815 		size_type find_first_of( char ch, size_type index = 0 ) const;
816 #if OGRE_IS_NATIVE_WCHAR_T
817 		//! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found
818 		size_type find_first_of( wchar_t ch, size_type index = 0 ) const;
819 #endif
820 		//! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found
821 		size_type find_first_of( unicode_char ch, size_type index = 0 ) const;
822 
823 		//! returns the index of the first character within the current string that does not match any character in \a str, beginning the search at \a index and searching at most \a num characters; returns \c UTFString::npos if nothing is found
824 		size_type find_first_not_of( const UTFString& str, size_type index = 0, size_type num = npos ) const;
825 		//! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found
826 		size_type find_first_not_of( code_point ch, size_type index = 0 ) const;
827 		//! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found
828 		size_type find_first_not_of( char ch, size_type index = 0 ) const;
829 #if OGRE_IS_NATIVE_WCHAR_T
830 		//! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found
831 		size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const;
832 #endif
833 		//! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found
834 		size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const;
835 
836 		//! returns the index of the first character within the current string that matches any character in \a str, doing a reverse search from \a index and searching at most \a num characters; returns \c UTFString::npos if nothing is found
837 		size_type find_last_of( const UTFString& str, size_type index = npos, size_type num = npos ) const;
838 		//! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
839 		size_type find_last_of( code_point ch, size_type index = npos ) const;
840 		//! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
841 		size_type find_last_of( char ch, size_type index = npos ) const {
842 			return find_last_of( static_cast<code_point>( ch ), index );
843 		}
844 #if OGRE_IS_NATIVE_WCHAR_T
845 		//! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
846 		size_type find_last_of( wchar_t ch, size_type index = npos ) const;
847 #endif
848 		//! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
849 		size_type find_last_of( unicode_char ch, size_type index = npos ) const;
850 
851 		//! returns the index of the last character within the current string that does not match any character in \a str, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
852 		size_type find_last_not_of( const UTFString& str, size_type index = npos, size_type num = npos ) const;
853 		//! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
854 		size_type find_last_not_of( code_point ch, size_type index = npos ) const;
855 		//! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
856 		size_type find_last_not_of( char ch, size_type index = npos ) const;
857 #if OGRE_IS_NATIVE_WCHAR_T
858 		//! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
859 		size_type find_last_not_of( wchar_t ch, size_type index = npos ) const;
860 #endif
861 		//! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found
862 		size_type find_last_not_of( unicode_char ch, size_type index = npos ) const;
863 		//@}
864 
865 		//////////////////////////////////////////////////////////////////////////
866 
867 		//!\name Operators
868 		//@{
869 		//! less than operator
870 		bool operator<( const UTFString& right ) const;
871 		//! less than or equal operator
872 		bool operator<=( const UTFString& right ) const;
873 		//! greater than operator
874 		bool operator>( const UTFString& right ) const;
875 		//! greater than or equal operator
876 		bool operator>=( const UTFString& right ) const;
877 		//! equality operator
878 		bool operator==( const UTFString& right ) const;
879 		//! inequality operator
880 		bool operator!=( const UTFString& right ) const;
881 		//! assignment operator, implicitly casts all compatible types
882 		UTFString& operator=( const UTFString& s );
883 		//! assignment operator
884 		UTFString& operator=( code_point ch );
885 		//! assignment operator
886 		UTFString& operator=( char ch );
887 #if OGRE_IS_NATIVE_WCHAR_T
888 		//! assignment operator
889 		UTFString& operator=( wchar_t ch );
890 #endif
891 		//! assignment operator
892 		UTFString& operator=( unicode_char ch );
893 		//! code point dereference operator
894 		code_point& operator[]( size_type index );
895 		//! code point dereference operator
896 		const code_point& operator[]( size_type index ) const;
897 		//@}
898 
899 		//////////////////////////////////////////////////////////////////////////
900 
901 		//!\name Implicit Cast Operators
902 		//@{
903 		//! implicit cast to std::string
904 		operator std::string() const;
905 		//! implicit cast to std::wstring
906 		operator std::wstring() const;
907 #if OGRE_STRING_USE_CUSTOM_MEMORY_ALLOCATOR
908 		//! implicit cast to Ogre::String
909 		operator Ogre::String() const;
910 #endif
911 		//@}
912 
913 		//////////////////////////////////////////////////////////////////////////
914 
915 		//!\name UTF-16 character encoding/decoding
916 		//@{
917 		//! returns \c true if \a cp does not match the signature for the lead of follow code point of a surrogate pair in a UTF-16 sequence
918 		static bool _utf16_independent_char( code_point cp );
919 		//! returns \c true if \a cp matches the signature of a surrogate pair lead character
920 		static bool _utf16_surrogate_lead( code_point cp );
921 		//! returns \c true if \a cp matches the signature of a surrogate pair following character
922 		static bool _utf16_surrogate_follow( code_point cp );
923 		//! estimates the number of UTF-16 code points in the sequence starting with \a cp
924 		static size_t _utf16_char_length( code_point cp );
925 		//! returns the number of UTF-16 code points needed to represent the given UTF-32 character \a cp
926 		static size_t _utf16_char_length( unicode_char uc );
927 		//! converts the given UTF-16 character buffer \a in_cp to a single UTF-32 Unicode character \a out_uc, returns the number of code points used to create the output character (2 for surrogate pairs, otherwise 1)
928 		/*! This function does it's best to prevent error conditions, verifying complete
929 		surrogate pairs before applying the algorithm. In the event that half of a pair
930 		is found it will happily generate a value in the 0xD800 - 0xDFFF range, which is
931 		normally an invalid Unicode value but we preserve them for use as sentinel values. */
932 		static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc );
933 		//! writes the given UTF-32 \a uc_in to the buffer location \a out_cp using UTF-16 encoding, returns the number of code points used to encode the input (always 1 or 2)
934 		/*! This function, like its counterpart, will happily create invalid UTF-16 surrogate pairs. These
935 		invalid entries will be created for any value of \c in_uc that falls in the range U+D800 - U+DFFF.
936 		These are generally useful as sentinel values to represent various program specific conditions.
937 		@note This function will also pass through any single UTF-16 code point without modification,
938 		making it a safe method of ensuring a stream that is unknown UTF-32 or UTF-16 is truly UTF-16.*/
939 		static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] );
940 		//@}
941 
942 		//////////////////////////////////////////////////////////////////////////
943 
944 		//!\name UTF-8 character encoding/decoding
945 		//@{
946 		//! returns \c true if \a cp is the beginning of a UTF-8 sequence
947 		static bool _utf8_start_char( unsigned char cp );
948 		//! estimates the number of UTF-8 code points in the sequence starting with \a cp
949 		static size_t _utf8_char_length( unsigned char cp );
950 		//! returns the number of UTF-8 code points needed to represent the given UTF-32 character \a cp
951 		static size_t _utf8_char_length( unicode_char uc );
952 
953 		//! converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of bytes used to create the output character (maximum of 6)
954 		static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc );
955 		//! writes the given UTF-32 \a uc_in to the buffer location \a out_cp using UTF-8 encoding, returns the number of bytes used to encode the input
956 		static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] );
957 
958 		//! verifies a UTF-8 stream, returning the total number of Unicode characters found
959 		static size_type _verifyUTF8( const unsigned char* c_str );
960 		//! verifies a UTF-8 stream, returning the total number of Unicode characters found
961 		static size_type _verifyUTF8( const std::string& str );
962 		//@}
963 
964 	private:
965 		//template<class ITER_TYPE> friend class _iterator;
966 		dstring mData;
967 
968 		//! buffer data type identifier
969 		enum BufferType {
970 			bt_none,
971 			bt_string,
972 			bt_wstring,
973 			bt_utf32string
974 		};
975 
976 		//! common constructor operations
977 		void _init();
978 
979 		///////////////////////////////////////////////////////////////////////
980 		// Scratch buffer
981 		//! auto cleans the scratch buffer using the proper delete for the stored type
982 		void _cleanBuffer() const;
983 
984 		//! create a std::string in the scratch buffer area
985 		void _getBufferStr() const;
986 		//! create a std::wstring in the scratch buffer area
987 		void _getBufferWStr() const;
988 		//! create a utf32string in the scratch buffer area
989 		void _getBufferUTF32Str() const;
990 
991 		void _load_buffer_UTF8() const;
992 		void _load_buffer_WStr() const;
993 		void _load_buffer_UTF32() const;
994 
995 		mutable BufferType mBufferType; // identifies the data type held in mBuffer
996 		mutable size_t mBufferSize; // size of the CString buffer
997 
998 		// multi-purpose buffer used everywhere we need a throw-away buffer
999 		union {
1000 			mutable void* mVoidBuffer;
1001 			mutable std::string* mStrBuffer;
1002 			mutable std::wstring* mWStrBuffer;
1003 			mutable utf32string* mUTF32StrBuffer;
1004 		}
1005 		mBuffer;
1006 	};
1007 
1008 	//! string addition operator \relates UTFString
1009 	inline UTFString operator+( const UTFString& s1, const UTFString& s2 ) {
1010 		return UTFString( s1 ).append( s2 );
1011 	}
1012 	//! string addition operator \relates UTFString
1013 	inline UTFString operator+( const UTFString& s1, UTFString::code_point c ) {
1014 		return UTFString( s1 ).append( 1, c );
1015 	}
1016 	//! string addition operator \relates UTFString
1017 	inline UTFString operator+( const UTFString& s1, UTFString::unicode_char c ) {
1018 		return UTFString( s1 ).append( 1, c );
1019 	}
1020 	//! string addition operator \relates UTFString
1021 	inline UTFString operator+( const UTFString& s1, char c ) {
1022 		return UTFString( s1 ).append( 1, c );
1023 	}
1024 #if OGRE_IS_NATIVE_WCHAR_T
1025 	//! string addition operator \relates UTFString
1026 	inline UTFString operator+( const UTFString& s1, wchar_t c ) {
1027 		return UTFString( s1 ).append( 1, c );
1028 	}
1029 #endif
1030 	//! string addition operator \relates UTFString
1031 	inline UTFString operator+( UTFString::code_point c, const UTFString& s2 ) {
1032 		return UTFString().append( 1, c ).append( s2 );
1033 	}
1034 	//! string addition operator \relates UTFString
1035 	inline UTFString operator+( UTFString::unicode_char c, const UTFString& s2 ) {
1036 		return UTFString().append( 1, c ).append( s2 );
1037 	}
1038 	//! string addition operator \relates UTFString
1039 	inline UTFString operator+( char c, const UTFString& s2 ) {
1040 		return UTFString().append( 1, c ).append( s2 );
1041 	}
1042 #if OGRE_IS_NATIVE_WCHAR_T
1043 	//! string addition operator \relates UTFString
1044 	inline UTFString operator+( wchar_t c, const UTFString& s2 ) {
1045 		return UTFString().append( 1, c ).append( s2 );
1046 	}
1047 #endif
1048 
1049 	// (const) forward iterator common operators
1050 	inline UTFString::size_type operator-( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1051 		return ( left.mIter - right.mIter );
1052 	}
1053 	inline bool operator==( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1054 		return left.mIter == right.mIter;
1055 	}
1056 	inline bool operator!=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1057 		return left.mIter != right.mIter;
1058 	}
1059 	inline bool operator<( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1060 		return left.mIter < right.mIter;
1061 	}
1062 	inline bool operator<=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1063 		return left.mIter <= right.mIter;
1064 	}
1065 	inline bool operator>( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1066 		return left.mIter > right.mIter;
1067 	}
1068 	inline bool operator>=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
1069 		return left.mIter >= right.mIter;
1070 	}
1071 
1072 	// (const) reverse iterator common operators
1073 	// NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator
1074 	inline UTFString::size_type operator-( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1075 		return ( right.mIter - left.mIter );
1076 	}
1077 	inline bool operator==( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1078 		return left.mIter == right.mIter;
1079 	}
1080 	inline bool operator!=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1081 		return left.mIter != right.mIter;
1082 	}
1083 	inline bool operator<( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1084 		return right.mIter < left.mIter;
1085 	}
1086 	inline bool operator<=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1087 		return right.mIter <= left.mIter;
1088 	}
1089 	inline bool operator>( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1090 		return right.mIter > left.mIter;
1091 	}
1092 	inline bool operator>=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
1093 		return right.mIter >= left.mIter;
1094 	}
1095 
1096 	//! std::ostream write operator \relates UTFString
1097 	inline std::ostream& operator << ( std::ostream& os, const UTFString& s ) {
1098 		return os << s.asUTF8();
1099 	}
1100 
1101 	//! std::wostream write operator \relates UTFString
1102 	inline std::wostream& operator << ( std::wostream& os, const UTFString& s ) {
1103 		return os << s.asWStr();
1104 	}
1105 
1106 	/** @} */
1107 	/** @} */
1108 
1109 
1110 } // namespace Ogre{
1111 
1112 #endif // OGRE_UNICODE_SUPPORT
1113 
1114 #include "OgreHeaderSuffix.h"
1115 
1116 #endif
1117