1 // Modified from OpenGUI under lenient license 2 // Original copyright details and licensing below: 3 // OpenGUI (http://opengui.sourceforge.net) 4 // This source code is released under the BSD License 5 6 // Permission is given to the Ogre project to use the contents of file within its 7 // source and binary applications, as well as any derivative works, in accordance 8 // with the terms of any license under which Ogre is or will be distributed. 9 // 10 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates 11 // to this file, under any terms that it deems fit, and is not required to maintain 12 // the original BSD licensing terms of this file, however OpenGUI retains the right 13 // to present its copy of this file under the terms of any license under which 14 // OpenGUI is distributed. 15 // 16 // Ogre is not required to release to OpenGUI any future changes that it makes to 17 // this file, and understands and agrees that any such changes that are released 18 // back to OpenGUI will become available under the terms of any license under which 19 // OpenGUI is distributed. 20 // 21 // For brevity, this permission text may be removed from this file if desired. 22 // The original record kept within the SourceForge (http://sourceforge.net/) tracker 23 // is sufficient. 24 // 25 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007] 26 27 #ifndef __OGRE_UTFSTRING_H__ 28 #define __OGRE_UTFSTRING_H__ 29 30 31 #include "OgrePrerequisites.h" 32 #include "OgreHeaderPrefix.h" 33 34 #if OGRE_UNICODE_SUPPORT 35 36 // these are explained later 37 #include <iterator> 38 #include <string> 39 #include <stdexcept> 40 41 // Workaround for VC7/7.1/8.0/9.0 (2003 - 2008): 42 // when build with /MD or /MDd, VC have both std::basic_string<unsigned short> and 43 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header 44 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile 45 // option). And since this file used both of them, causing compiler instantiating another 46 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll. 47 // 48 #if OGRE_COMPILER == OGRE_COMPILER_MSVC && (OGRE_COMP_VER >= 1300 && OGRE_COMP_VER < 1600) 49 50 # if defined(_DLL_CPPLIB) 51 52 namespace std 53 { 54 template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>, 55 allocator<unsigned short> >; 56 57 template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>, 58 allocator<__wchar_t> >; 59 } 60 61 # endif // defined(_DLL_CPPLIB) 62 63 #endif // OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_COMP_VER == 1300 64 65 66 namespace Ogre { 67 /** \addtogroup Core 68 * @{ 69 */ 70 /** \addtogroup Overlays 71 * @{ 72 */ 73 74 /* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS 75 =NOTICE= 76 This class is not a complete Unicode solution. It purposefully does not 77 provide certain functionality, such as proper lexical sorting for 78 Unicode values. It does provide comparison operators for the sole purpose 79 of using UTFString as an index with std::map and other operator< sorted 80 containers, but it should NOT be relied upon for meaningful lexical 81 operations, such as alphabetical sorts. If you need this type of 82 functionality, look into using ICU instead (http://icu.sourceforge.net/). 83 84 =REQUIREMENTS= 85 There are a few requirements for proper operation. They are fairly small, 86 and shouldn't restrict usage on any reasonable target. 87 * Compiler must support unsigned 16-bit integer types 88 * Compiler must support signed 32-bit integer types 89 * wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such 90 using the WCHAR_UTF16 macro as outlined below. 91 * You must include <iterator>, <string>, and <wchar>. Probably more, but 92 these are the most obvious. 93 94 =REQUIRED PREPROCESSOR MACROS= 95 This class requires two preprocessor macros to be defined in order to 96 work as advertised. 97 INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int) 98 UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short) 99 100 Additionally, a third macro should be defined to control the evaluation of wchar_t: 101 WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points, 102 such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit 103 integer representing UTF-32 code points. 104 */ 105 106 // THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS 107 #ifdef __STDC_ISO_10646__ 108 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger) 109 // so we can safely skip the rest of the testing 110 #else // #ifdef __STDC_ISO_10646__ 111 #if defined( __WIN32__ ) || defined( _WIN32 ) || !defined(ANDROID) 112 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t 113 #else // #if defined( __WIN32__ ) || defined( _WIN32 ) 114 #if OGRE_COMPILER != OGRE_COMPILER_GCCE 115 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h> 116 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit 117 #endif // #if WCHAR_MAX <= 0xFFFF 118 #endif 119 #endif // #if defined( __WIN32__ ) || defined( _WIN32 ) 120 #endif // #ifdef __STDC_ISO_10646__ 121 122 123 // OGRE_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of 124 // uint16 or uint32. 125 #if OGRE_COMPILER == OGRE_COMPILER_MSVC 126 127 // Don't define wchar_t related functions since it'll duplicate 128 // with UTFString::code_point related functions when compile 129 // without /Zc:wchar_t, because in this case both of them are 130 // a typedef of uint16. 131 # if defined(_NATIVE_WCHAR_T_DEFINED) 132 # define OGRE_IS_NATIVE_WCHAR_T 1 133 # else 134 # define OGRE_IS_NATIVE_WCHAR_T 0 135 # endif 136 #else // OGRE_COMPILER != OGRE_COMPILER_MSVC 137 138 // Assumed wchar_t is natively for other compilers 139 # define OGRE_IS_NATIVE_WCHAR_T 1 140 141 #endif // OGRE_COMPILER == OGRE_COMPILER_MSVC 142 143 //! A UTF-16 string with implicit conversion to/from std::string and std::wstring 144 /*! This class provides a complete 1 to 1 map of most std::string functions (at least to my 145 knowledge). Implicit conversions allow this string class to work with all common C++ string 146 formats, with specialty functions defined where implicit conversion would cause potential 147 problems or is otherwise unavailable. 148 149 Some additional functionality is present to assist in working with characters using the 150 32-bit UTF-32 encoding. (Which is guaranteed to fit any Unicode character into a single 151 code point.) \b Note: Reverse iterators do not have this functionality due to the 152 ambiguity that surrounds working with UTF-16 in reverse. (Such as, where should an 153 iterator point to represent the beginning of a surrogate pair?) 154 155 156 \par Supported Input Types 157 The supported string types for input, and their assumed encoding schemes, are: 158 - std::string (UTF-8) 159 - char* (UTF-8) 160 - std::wstring (autodetected UTF-16 / UTF-32 based on compiler) 161 - wchar_t* (autodetected UTF-16 / UTF-32 based on compiler) 162 163 164 \see 165 - For additional information on UTF-16 encoding: http://en.wikipedia.org/wiki/UTF-16 166 - For additional information on UTF-8 encoding: http://en.wikipedia.org/wiki/UTF-8 167 - For additional information on UTF-32 encoding: http://en.wikipedia.org/wiki/UTF-32 168 */ 169 class _OgreExport UTFString { 170 // constants used in UTF-8 conversions 171 static const unsigned char _lead1 = 0xC0; //110xxxxx 172 static const unsigned char _lead1_mask = 0x1F; //00011111 173 static const unsigned char _lead2 = 0xE0; //1110xxxx 174 static const unsigned char _lead2_mask = 0x0F; //00001111 175 static const unsigned char _lead3 = 0xF0; //11110xxx 176 static const unsigned char _lead3_mask = 0x07; //00000111 177 static const unsigned char _lead4 = 0xF8; //111110xx 178 static const unsigned char _lead4_mask = 0x03; //00000011 179 static const unsigned char _lead5 = 0xFC; //1111110x 180 static const unsigned char _lead5_mask = 0x01; //00000001 181 static const unsigned char _cont = 0x80; //10xxxxxx 182 static const unsigned char _cont_mask = 0x3F; //00111111 183 184 public: 185 //! size type used to indicate string size and character positions within the string 186 typedef size_t size_type; 187 //! the usual constant representing: not found, no limit, etc 188 static const size_type npos = static_cast<size_type>(~0); 189 190 //! a single 32-bit Unicode character 191 typedef uint32 unicode_char; 192 193 //! a single UTF-16 code point 194 typedef uint16 code_point; 195 196 //! value type typedef for use in iterators 197 typedef code_point value_type; 198 199 typedef std::basic_string<code_point> dstring; // data string 200 201 //! string type used for returning UTF-32 formatted data 202 typedef std::basic_string<unicode_char> utf32string; 203 204 //! This exception is used when invalid data streams are encountered 205 class _OgreExport invalid_data: public std::runtime_error { /* i don't know why the beautifier is freaking out on this line */ 206 public: 207 //! constructor takes a string message that can be later retrieved by the what() function invalid_data(const std::string & _Message)208 explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message ) { 209 /* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */ 210 } 211 }; 212 213 //######################################################################### 214 //! base iterator class for UTFString 215 class _OgreExport _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type> { /* i don't know why the beautifier is freaking out on this line */ 216 friend class UTFString; 217 protected: 218 _base_iterator(); 219 220 void _seekFwd( size_type c ); 221 void _seekRev( size_type c ); 222 void _become( const _base_iterator& i ); 223 bool _test_begin() const; 224 bool _test_end() const; 225 size_type _get_index() const; 226 void _jump_to( size_type index ); 227 228 unicode_char _getCharacter() const; 229 int _setCharacter( unicode_char uc ); 230 231 void _moveNext(); 232 void _movePrev(); 233 234 dstring::iterator mIter; 235 UTFString* mString; 236 }; 237 238 //######################################################################### 239 // FORWARD ITERATORS 240 //######################################################################### 241 class _const_fwd_iterator; // forward declaration 242 243 //! forward iterator for UTFString 244 class _OgreExport _fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 245 friend class _const_fwd_iterator; 246 public: 247 _fwd_iterator(); 248 _fwd_iterator( const _fwd_iterator& i ); 249 250 //! pre-increment 251 _fwd_iterator& operator++(); 252 //! post-increment 253 _fwd_iterator operator++( int ); 254 255 //! pre-decrement 256 _fwd_iterator& operator--(); 257 //! post-decrement 258 _fwd_iterator operator--( int ); 259 260 //! addition operator 261 _fwd_iterator operator+( difference_type n ); 262 //! subtraction operator 263 _fwd_iterator operator-( difference_type n ); 264 265 //! addition assignment operator 266 _fwd_iterator& operator+=( difference_type n ); 267 //! subtraction assignment operator 268 _fwd_iterator& operator-=( difference_type n ); 269 270 //! dereference operator 271 value_type& operator*() const; 272 273 //! dereference at offset operator 274 value_type& operator[]( difference_type n ) const; 275 276 //! advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream 277 _fwd_iterator& moveNext(); 278 //! rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream 279 _fwd_iterator& movePrev(); 280 //! Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed) 281 unicode_char getCharacter() const; 282 //! Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); returns the amount of string length change caused by the operation 283 int setCharacter( unicode_char uc ); 284 }; 285 286 287 288 //######################################################################### 289 //! const forward iterator for UTFString 290 class _OgreExport _const_fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 291 public: 292 _const_fwd_iterator(); 293 _const_fwd_iterator( const _const_fwd_iterator& i ); 294 _const_fwd_iterator( const _fwd_iterator& i ); 295 296 //! pre-increment 297 _const_fwd_iterator& operator++(); 298 //! post-increment 299 _const_fwd_iterator operator++( int ); 300 301 //! pre-decrement 302 _const_fwd_iterator& operator--(); 303 //! post-decrement 304 _const_fwd_iterator operator--( int ); 305 306 //! addition operator 307 _const_fwd_iterator operator+( difference_type n ); 308 //! subtraction operator 309 _const_fwd_iterator operator-( difference_type n ); 310 311 //! addition assignment operator 312 _const_fwd_iterator& operator+=( difference_type n ); 313 //! subtraction assignment operator 314 _const_fwd_iterator& operator-=( difference_type n ); 315 316 //! dereference operator 317 const value_type& operator*() const; 318 319 //! dereference at offset operator 320 const value_type& operator[]( difference_type n ) const; 321 322 //! advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream 323 _const_fwd_iterator& moveNext(); 324 //! rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream 325 _const_fwd_iterator& movePrev(); 326 //! Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed) 327 unicode_char getCharacter() const; 328 329 //! difference operator 330 friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 331 //! equality operator 332 friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 333 //! inequality operator 334 friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 335 //! less than 336 friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 337 //! less than or equal 338 friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 339 //! greater than 340 friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 341 //! greater than or equal 342 friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 343 344 }; 345 346 //######################################################################### 347 // REVERSE ITERATORS 348 //######################################################################### 349 class _const_rev_iterator; // forward declaration 350 //! forward iterator for UTFString 351 class _OgreExport _rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 352 friend class _const_rev_iterator; 353 public: 354 _rev_iterator(); 355 _rev_iterator( const _rev_iterator& i ); 356 357 //! pre-increment 358 _rev_iterator& operator++(); 359 //! post-increment 360 _rev_iterator operator++( int ); 361 362 //! pre-decrement 363 _rev_iterator& operator--(); 364 //! post-decrement 365 _rev_iterator operator--( int ); 366 367 //! addition operator 368 _rev_iterator operator+( difference_type n ); 369 //! subtraction operator 370 _rev_iterator operator-( difference_type n ); 371 372 //! addition assignment operator 373 _rev_iterator& operator+=( difference_type n ); 374 //! subtraction assignment operator 375 _rev_iterator& operator-=( difference_type n ); 376 377 //! dereference operator 378 value_type& operator*() const; 379 380 //! dereference at offset operator 381 value_type& operator[]( difference_type n ) const; 382 }; 383 //######################################################################### 384 //! const reverse iterator for UTFString 385 class _OgreExport _const_rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 386 public: 387 _const_rev_iterator(); 388 _const_rev_iterator( const _const_rev_iterator& i ); 389 _const_rev_iterator( const _rev_iterator& i ); 390 //! pre-increment 391 _const_rev_iterator& operator++(); 392 //! post-increment 393 _const_rev_iterator operator++( int ); 394 395 //! pre-decrement 396 _const_rev_iterator& operator--(); 397 //! post-decrement 398 _const_rev_iterator operator--( int ); 399 400 //! addition operator 401 _const_rev_iterator operator+( difference_type n ); 402 //! subtraction operator 403 _const_rev_iterator operator-( difference_type n ); 404 405 //! addition assignment operator 406 _const_rev_iterator& operator+=( difference_type n ); 407 //! subtraction assignment operator 408 _const_rev_iterator& operator-=( difference_type n ); 409 410 //! dereference operator 411 const value_type& operator*() const; 412 413 //! dereference at offset operator 414 const value_type& operator[]( difference_type n ) const; 415 416 //! difference operator 417 friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right ); 418 //! equality operator 419 friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right ); 420 //! inequality operator 421 friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 422 //! less than 423 friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right ); 424 //! less than or equal 425 friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 426 //! greater than 427 friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right ); 428 //! greater than or equal 429 friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 430 }; 431 //######################################################################### 432 433 typedef _fwd_iterator iterator; //!< iterator 434 typedef _rev_iterator reverse_iterator; //!< reverse iterator 435 typedef _const_fwd_iterator const_iterator; //!< const iterator 436 typedef _const_rev_iterator const_reverse_iterator; //!< const reverse iterator 437 438 439 //!\name Constructors/Destructor 440 //@{ 441 //! default constructor, creates an empty string 442 UTFString(); 443 //! copy constructor 444 UTFString( const UTFString& copy ); 445 //! \a length copies of \a ch 446 UTFString( size_type length, const code_point& ch ); 447 //! duplicate of nul-terminated sequence \a str 448 UTFString( const code_point* str ); 449 //! duplicate of \a str, \a length code points long 450 UTFString( const code_point* str, size_type length ); 451 //! substring of \a str starting at \a index and \a length code points long 452 UTFString( const UTFString& str, size_type index, size_type length ); 453 #if OGRE_IS_NATIVE_WCHAR_T 454 //! duplicate of nul-terminated \c wchar_t array 455 UTFString( const wchar_t* w_str ); 456 //! duplicate of \a w_str, \a length characters long 457 UTFString( const wchar_t* w_str, size_type length ); 458 #endif 459 //! duplicate of \a wstr 460 UTFString( const std::wstring& wstr ); 461 //! duplicate of nul-terminated C-string \a c_str (UTF-8 encoding) 462 UTFString( const char* c_str ); 463 //! duplicate of \a c_str, \a length characters long (UTF-8 encoding) 464 UTFString( const char* c_str, size_type length ); 465 //! duplicate of \a str (UTF-8 encoding) 466 UTFString( const std::string& str ); 467 #if OGRE_STRING_USE_CUSTOM_MEMORY_ALLOCATOR 468 UTFString( const Ogre::String& str ); 469 #endif 470 471 //! destructor 472 ~UTFString(); 473 //@} 474 475 ////////////////////////////////////////////////////////////////////////// 476 477 //!\name Utility functions 478 //@{ 479 //! Returns the number of code points in the current string 480 size_type size() const; 481 //! Returns the number of code points in the current string 482 size_type length() const; 483 //! Returns the number of Unicode characters in the string 484 /*! Executes in linear time. */ 485 size_type length_Characters() const; 486 //! returns the maximum number of UTF-16 code points that the string can hold 487 size_type max_size() const; 488 //! sets the capacity of the string to at least \a size code points 489 void reserve( size_type size ); 490 //! changes the size of the string to \a size, filling in any new area with \a val 491 void resize( size_type num, const code_point& val = 0 ); 492 //! exchanges the elements of the current string with those of \a from 493 void swap( UTFString& from ); 494 //! returns \c true if the string has no elements, \c false otherwise 495 bool empty() const; 496 //! returns a pointer to the first character in the current string 497 const code_point* c_str() const; 498 //! returns a pointer to the first character in the current string 499 const code_point* data() const; 500 //! returns the number of elements that the string can hold before it will need to allocate more space 501 size_type capacity() const; 502 //! deletes all of the elements in the string 503 void clear(); 504 //! returns a substring of the current string, starting at \a index, and \a num characters long. 505 /*! If \a num is omitted, it will default to \c UTFString::npos, and the substr() function will simply return the remainder of the string starting at \a index. */ 506 UTFString substr( size_type index, size_type num = npos ) const; 507 //! appends \a val to the end of the string 508 void push_back( unicode_char val ); 509 #if OGRE_IS_NATIVE_WCHAR_T 510 //! appends \a val to the end of the string 511 void push_back( wchar_t val ); 512 #endif 513 //! appends \a val to the end of the string 514 /*! This can be used to push surrogate pair code points, you'll just need to push them 515 one after the other. */ 516 void push_back( code_point val ); 517 //! appends \a val to the end of the string 518 /*! Limited to characters under the 127 value barrier. */ 519 void push_back( char val ); 520 //! returns \c true if the given Unicode character \a ch is in this string 521 bool inString( unicode_char ch ) const; 522 //@} 523 524 ////////////////////////////////////////////////////////////////////////// 525 526 //!\name Stream variations 527 //@{ 528 //! returns the current string in UTF-8 form within a std::string 529 const std::string& asUTF8() const; 530 //! returns the current string in UTF-8 form as a nul-terminated char array 531 const char* asUTF8_c_str() const; 532 //! returns the current string in UTF-32 form within a utf32string 533 const utf32string& asUTF32() const; 534 //! returns the current string in UTF-32 form as a nul-terminated unicode_char array 535 const unicode_char* asUTF32_c_str() const; 536 //! returns the current string in the native form of std::wstring 537 const std::wstring& asWStr() const; 538 //! returns the current string in the native form of a nul-terminated wchar_t array 539 const wchar_t* asWStr_c_str() const; 540 //@} 541 542 ////////////////////////////////////////////////////////////////////////// 543 544 //!\name Single Character Access 545 //@{ 546 //! returns a reference to the element in the string at index \c loc 547 code_point& at( size_type loc ); 548 //! returns a reference to the element in the string at index \c loc 549 const code_point& at( size_type loc ) const; 550 //! returns the data point \a loc evaluated as a UTF-32 value 551 /*! This function will will only properly decode surrogate pairs when \a loc points to the index 552 of a lead code point that is followed by a trailing code point. Evaluating the trailing code point 553 itself, or pointing to a code point that is a sentinel value (part of a broken pair) will return 554 the value of just that code point (not a valid Unicode value, but useful as a sentinel value). */ 555 unicode_char getChar( size_type loc ) const; 556 //! sets the value of the character at \a loc to the Unicode value \a ch (UTF-32) 557 /*! Providing sentinel values (values between U+D800-U+DFFF) are accepted, but you should be aware 558 that you can also unwittingly create a valid surrogate pair if you don't pay attention to what you 559 are doing. @note This operation may also lengthen the string if a surrogate pair is needed to 560 represent the value given, but one is not available to replace; or alternatively shorten the string 561 if an existing surrogate pair is replaced with a character that is representable without a surrogate 562 pair. The return value will signify any lengthening or shortening performed, returning 0 if no change 563 was made, -1 if the string was shortened, or 1 if the string was lengthened. Any single call can 564 only change the string length by + or - 1. */ 565 int setChar( size_type loc, unicode_char ch ); 566 //@} 567 568 ////////////////////////////////////////////////////////////////////////// 569 570 //!\name iterator acquisition 571 //@{ 572 //! returns an iterator to the first element of the string 573 iterator begin(); 574 //! returns an iterator to the first element of the string 575 const_iterator begin() const; 576 //! returns an iterator just past the end of the string 577 iterator end(); 578 //! returns an iterator just past the end of the string 579 const_iterator end() const; 580 //! returns a reverse iterator to the last element of the string 581 reverse_iterator rbegin(); 582 //! returns a reverse iterator to the last element of the string 583 const_reverse_iterator rbegin() const; 584 //! returns a reverse iterator just past the beginning of the string 585 reverse_iterator rend(); 586 //! returns a reverse iterator just past the beginning of the string 587 const_reverse_iterator rend() const; 588 //@} 589 590 ////////////////////////////////////////////////////////////////////////// 591 592 //!\name assign 593 //@{ 594 //! gives the current string the values from \a start to \a end 595 UTFString& assign( iterator start, iterator end ); 596 //! assign \a str to the current string 597 UTFString& assign( const UTFString& str ); 598 //! assign the nul-terminated \a str to the current string 599 UTFString& assign( const code_point* str ); 600 //! assign the first \a num characters of \a str to the current string 601 UTFString& assign( const code_point* str, size_type num ); 602 //! assign \a len entries from \a str to the current string, starting at \a index 603 UTFString& assign( const UTFString& str, size_type index, size_type len ); 604 //! assign \a num copies of \a ch to the current string 605 UTFString& assign( size_type num, const code_point& ch ); 606 //! assign \a wstr to the current string (\a wstr is treated as a UTF-16 stream) 607 UTFString& assign( const std::wstring& wstr ); 608 #if OGRE_IS_NATIVE_WCHAR_T 609 //! assign \a w_str to the current string 610 UTFString& assign( const wchar_t* w_str ); 611 //! assign the first \a num characters of \a w_str to the current string 612 UTFString& assign( const wchar_t* w_str, size_type num ); 613 #endif 614 //! assign \a str to the current string (\a str is treated as a UTF-8 stream) 615 UTFString& assign( const std::string& str ); 616 //! assign \a c_str to the current string (\a c_str is treated as a UTF-8 stream) 617 UTFString& assign( const char* c_str ); 618 //! assign the first \a num characters of \a c_str to the current string (\a c_str is treated as a UTF-8 stream) 619 UTFString& assign( const char* c_str, size_type num ); 620 //@} 621 622 ////////////////////////////////////////////////////////////////////////// 623 624 //!\name append 625 //@{ 626 //! appends \a str on to the end of the current string 627 UTFString& append( const UTFString& str ); 628 //! appends \a str on to the end of the current string 629 UTFString& append( const code_point* str ); 630 //! appends a substring of \a str starting at \a index that is \a len characters long on to the end of the current string 631 UTFString& append( const UTFString& str, size_type index, size_type len ); 632 //! appends \a num characters of \a str on to the end of the current string 633 UTFString& append( const code_point* str, size_type num ); 634 //! appends \a num repetitions of \a ch on to the end of the current string 635 UTFString& append( size_type num, code_point ch ); 636 //! appends the sequence denoted by \a start and \a end on to the end of the current string 637 UTFString& append( iterator start, iterator end ); 638 #if OGRE_IS_NATIVE_WCHAR_T 639 //! appends \a num characters of \a str on to the end of the current string 640 UTFString& append( const wchar_t* w_str, size_type num ); 641 //! appends \a num repetitions of \a ch on to the end of the current string 642 UTFString& append( size_type num, wchar_t ch ); 643 #endif 644 //! appends \a num characters of \a str on to the end of the current string (UTF-8 encoding) 645 UTFString& append( const char* c_str, size_type num ); 646 //! appends \a num repetitions of \a ch on to the end of the current string (Unicode values less than 128) 647 UTFString& append( size_type num, char ch ); 648 //! appends \a num repetitions of \a ch on to the end of the current string (Full Unicode spectrum) 649 UTFString& append( size_type num, unicode_char ch ); 650 //@} 651 652 ////////////////////////////////////////////////////////////////////////// 653 654 //!\name insert 655 //@{ 656 //! inserts \a ch before the code point denoted by \a i 657 iterator insert( iterator i, const code_point& ch ); 658 //! inserts \a str into the current string, at location \a index 659 UTFString& insert( size_type index, const UTFString& str ); 660 //! inserts \a str into the current string, at location \a index insert(size_type index,const code_point * str)661 UTFString& insert( size_type index, const code_point* str ) { 662 mData.insert( index, str ); 663 return *this; 664 } 665 //! inserts a substring of \a str (starting at \a index2 and \a num code points long) into the current string, at location \a index1 666 UTFString& insert( size_type index1, const UTFString& str, size_type index2, size_type num ); 667 //! inserts the code points denoted by \a start and \a end into the current string, before the code point specified by \a i 668 void insert( iterator i, iterator start, iterator end ); 669 //! inserts \a num code points of \a str into the current string, at location \a index 670 UTFString& insert( size_type index, const code_point* str, size_type num ); 671 #if OGRE_IS_NATIVE_WCHAR_T 672 //! inserts \a num code points of \a str into the current string, at location \a index 673 UTFString& insert( size_type index, const wchar_t* w_str, size_type num ); 674 #endif 675 //! inserts \a num code points of \a str into the current string, at location \a index 676 UTFString& insert( size_type index, const char* c_str, size_type num ); 677 //! inserts \a num copies of \a ch into the current string, at location \a index 678 UTFString& insert( size_type index, size_type num, code_point ch ); 679 #if OGRE_IS_NATIVE_WCHAR_T 680 //! inserts \a num copies of \a ch into the current string, at location \a index 681 UTFString& insert( size_type index, size_type num, wchar_t ch ); 682 #endif 683 //! inserts \a num copies of \a ch into the current string, at location \a index 684 UTFString& insert( size_type index, size_type num, char ch ); 685 //! inserts \a num copies of \a ch into the current string, at location \a index 686 UTFString& insert( size_type index, size_type num, unicode_char ch ); 687 //! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i 688 void insert( iterator i, size_type num, const code_point& ch ); 689 #if OGRE_IS_NATIVE_WCHAR_T 690 //! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i 691 void insert( iterator i, size_type num, const wchar_t& ch ); 692 #endif 693 //! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i 694 void insert( iterator i, size_type num, const char& ch ); 695 //! inserts \a num copies of \a ch into the current string, before the code point denoted by \a i 696 void insert( iterator i, size_type num, const unicode_char& ch ); 697 //@} 698 699 ////////////////////////////////////////////////////////////////////////// 700 701 //!\name erase 702 //@{ 703 //! removes the code point pointed to by \a loc, returning an iterator to the next character 704 iterator erase( iterator loc ); 705 //! removes the code points between \a start and \a end (including the one at \a start but not the one at \a end), returning an iterator to the code point after the last code point removed 706 iterator erase( iterator start, iterator end ); 707 //! removes \a num code points from the current string, starting at \a index 708 UTFString& erase( size_type index = 0, size_type num = npos ); 709 //@} 710 711 ////////////////////////////////////////////////////////////////////////// 712 713 //!\name replace 714 //@{ 715 //! replaces up to \a num1 code points of the current string (starting at \a index1) with \a str 716 UTFString& replace( size_type index1, size_type num1, const UTFString& str ); 717 //! replaces up to \a num1 code points of the current string (starting at \a index1) with up to \a num2 code points from \a str 718 UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type num2 ); 719 //! replaces up to \a num1 code points of the current string (starting at \a index1) with up to \a num2 code points from \a str beginning at \a index2 720 UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type index2, size_type num2 ); 721 //! replaces code points in the current string from \a start to \a end with \a num code points from \a str 722 UTFString& replace( iterator start, iterator end, const UTFString& str, size_type num = npos ); 723 //! replaces up to \a num1 code points in the current string (beginning at \a index) with \c num2 copies of \c ch 724 UTFString& replace( size_type index, size_type num1, size_type num2, code_point ch ); 725 //! replaces the code points in the current string from \a start to \a end with \a num copies of \a ch 726 UTFString& replace( iterator start, iterator end, size_type num, code_point ch ); 727 //@} 728 729 ////////////////////////////////////////////////////////////////////////// 730 731 //!\name compare 732 //@{ 733 //! compare \a str to the current string 734 int compare( const UTFString& str ) const; 735 //! compare \a str to the current string 736 int compare( const code_point* str ) const; 737 //! compare \a str to a substring of the current string, starting at \a index for \a length characters 738 int compare( size_type index, size_type length, const UTFString& str ) const; 739 //! compare a substring of \a str to a substring of the current string, where \a index2 and \a length2 refer to \a str and \a index and \a length refer to the current string 740 int compare( size_type index, size_type length, const UTFString& str, size_type index2, size_type length2 ) const; 741 //! compare a substring of \a str to a substring of the current string, where the substring of \a str begins at zero and is \a length2 characters long, and the substring of the current string begins at \a index and is \a length characters long 742 int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const; 743 #if OGRE_IS_NATIVE_WCHAR_T 744 //! compare a substring of \a str to a substring of the current string, where the substring of \a str begins at zero and is \a length2 elements long, and the substring of the current string begins at \a index and is \a length characters long 745 int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const; 746 #endif 747 //! compare a substring of \a str to a substring of the current string, where the substring of \a str begins at zero and is \a length2 <b>UTF-8 code points</b> long, and the substring of the current string begins at \a index and is \a length characters long 748 int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const; 749 //@} 750 751 ////////////////////////////////////////////////////////////////////////// 752 753 //!\name find & rfind 754 //@{ 755 //! returns the index of the first occurrence of \a str within the current string, starting at \a index; returns \c UTFString::npos if nothing is found 756 /*! \a str is a UTF-16 encoded string, but through implicit casting can also be a UTF-8 encoded string (const char* or std::string) */ 757 size_type find( const UTFString& str, size_type index = 0 ) const; 758 //! returns the index of the first occurrence of \a str within the current string and within \a length code points, starting at \a index; returns \c UTFString::npos if nothing is found 759 /*! \a cp_str is a UTF-16 encoded string */ 760 size_type find( const code_point* cp_str, size_type index, size_type length ) const; 761 //! returns the index of the first occurrence of \a str within the current string and within \a length code points, starting at \a index; returns \c UTFString::npos if nothing is found 762 /*! \a cp_str is a UTF-8 encoded string */ 763 size_type find( const char* c_str, size_type index, size_type length ) const; 764 #if OGRE_IS_NATIVE_WCHAR_T 765 //! returns the index of the first occurrence of \a str within the current string and within \a length code points, starting at \a index; returns \c UTFString::npos if nothing is found 766 /*! \a cp_str is a UTF-16 encoded string */ 767 size_type find( const wchar_t* w_str, size_type index, size_type length ) const; 768 #endif 769 //! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found 770 /*! \a ch is only capable of representing Unicode values up to U+007F (127) */ 771 size_type find( char ch, size_type index = 0 ) const; 772 //! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found 773 /*! \a ch is only capable of representing Unicode values up to U+FFFF (65535) */ 774 size_type find( code_point ch, size_type index = 0 ) const; 775 #if OGRE_IS_NATIVE_WCHAR_T 776 //! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found 777 /*! \a ch is only capable of representing Unicode values up to U+FFFF (65535) */ 778 size_type find( wchar_t ch, size_type index = 0 ) const; 779 #endif 780 //! returns the index of the first occurrence \a ch within the current string, starting at \a index; returns \c UTFString::npos if nothing is found 781 /*! \a ch can fully represent any Unicode character */ 782 size_type find( unicode_char ch, size_type index = 0 ) const; 783 784 //! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 785 size_type rfind( const UTFString& str, size_type index = 0 ) const; 786 //! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index, searching at most \a num characters; returns \c UTFString::npos if nothing is found 787 size_type rfind( const code_point* cp_str, size_type index, size_type num ) const; 788 //! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index, searching at most \a num characters; returns \c UTFString::npos if nothing is found 789 size_type rfind( const char* c_str, size_type index, size_type num ) const; 790 #if OGRE_IS_NATIVE_WCHAR_T 791 //! returns the location of the first occurrence of \a str in the current string, doing a reverse search from \a index, searching at most \a num characters; returns \c UTFString::npos if nothing is found 792 size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const; 793 #endif 794 //! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 795 size_type rfind( char ch, size_type index = 0 ) const; 796 //! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 797 size_type rfind( code_point ch, size_type index ) const; 798 #if OGRE_IS_NATIVE_WCHAR_T 799 //! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 800 size_type rfind( wchar_t ch, size_type index = 0 ) const; 801 #endif 802 //! returns the location of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 803 size_type rfind( unicode_char ch, size_type index = 0 ) const; 804 //@} 805 806 ////////////////////////////////////////////////////////////////////////// 807 808 //!\name find_first/last_(not)_of 809 //@{ 810 //! Returns the index of the first character within the current string that matches \b any character in \a str, beginning the search at \a index and searching at most \a num characters; returns \c UTFString::npos if nothing is found 811 size_type find_first_of( const UTFString &str, size_type index = 0, size_type num = npos ) const; 812 //! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found 813 size_type find_first_of( code_point ch, size_type index = 0 ) const; 814 //! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found 815 size_type find_first_of( char ch, size_type index = 0 ) const; 816 #if OGRE_IS_NATIVE_WCHAR_T 817 //! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found 818 size_type find_first_of( wchar_t ch, size_type index = 0 ) const; 819 #endif 820 //! returns the index of the first occurrence of \a ch in the current string, starting the search at \a index; returns \c UTFString::npos if nothing is found 821 size_type find_first_of( unicode_char ch, size_type index = 0 ) const; 822 823 //! returns the index of the first character within the current string that does not match any character in \a str, beginning the search at \a index and searching at most \a num characters; returns \c UTFString::npos if nothing is found 824 size_type find_first_not_of( const UTFString& str, size_type index = 0, size_type num = npos ) const; 825 //! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found 826 size_type find_first_not_of( code_point ch, size_type index = 0 ) const; 827 //! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found 828 size_type find_first_not_of( char ch, size_type index = 0 ) const; 829 #if OGRE_IS_NATIVE_WCHAR_T 830 //! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found 831 size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const; 832 #endif 833 //! returns the index of the first character within the current string that does not match \a ch, starting the search at \a index; returns \c UTFString::npos if nothing is found 834 size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const; 835 836 //! returns the index of the first character within the current string that matches any character in \a str, doing a reverse search from \a index and searching at most \a num characters; returns \c UTFString::npos if nothing is found 837 size_type find_last_of( const UTFString& str, size_type index = npos, size_type num = npos ) const; 838 //! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 839 size_type find_last_of( code_point ch, size_type index = npos ) const; 840 //! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 841 size_type find_last_of( char ch, size_type index = npos ) const { 842 return find_last_of( static_cast<code_point>( ch ), index ); 843 } 844 #if OGRE_IS_NATIVE_WCHAR_T 845 //! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 846 size_type find_last_of( wchar_t ch, size_type index = npos ) const; 847 #endif 848 //! returns the index of the first occurrence of \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 849 size_type find_last_of( unicode_char ch, size_type index = npos ) const; 850 851 //! returns the index of the last character within the current string that does not match any character in \a str, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 852 size_type find_last_not_of( const UTFString& str, size_type index = npos, size_type num = npos ) const; 853 //! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 854 size_type find_last_not_of( code_point ch, size_type index = npos ) const; 855 //! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 856 size_type find_last_not_of( char ch, size_type index = npos ) const; 857 #if OGRE_IS_NATIVE_WCHAR_T 858 //! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 859 size_type find_last_not_of( wchar_t ch, size_type index = npos ) const; 860 #endif 861 //! returns the index of the last occurrence of a character that does not match \a ch in the current string, doing a reverse search from \a index; returns \c UTFString::npos if nothing is found 862 size_type find_last_not_of( unicode_char ch, size_type index = npos ) const; 863 //@} 864 865 ////////////////////////////////////////////////////////////////////////// 866 867 //!\name Operators 868 //@{ 869 //! less than operator 870 bool operator<( const UTFString& right ) const; 871 //! less than or equal operator 872 bool operator<=( const UTFString& right ) const; 873 //! greater than operator 874 bool operator>( const UTFString& right ) const; 875 //! greater than or equal operator 876 bool operator>=( const UTFString& right ) const; 877 //! equality operator 878 bool operator==( const UTFString& right ) const; 879 //! inequality operator 880 bool operator!=( const UTFString& right ) const; 881 //! assignment operator, implicitly casts all compatible types 882 UTFString& operator=( const UTFString& s ); 883 //! assignment operator 884 UTFString& operator=( code_point ch ); 885 //! assignment operator 886 UTFString& operator=( char ch ); 887 #if OGRE_IS_NATIVE_WCHAR_T 888 //! assignment operator 889 UTFString& operator=( wchar_t ch ); 890 #endif 891 //! assignment operator 892 UTFString& operator=( unicode_char ch ); 893 //! code point dereference operator 894 code_point& operator[]( size_type index ); 895 //! code point dereference operator 896 const code_point& operator[]( size_type index ) const; 897 //@} 898 899 ////////////////////////////////////////////////////////////////////////// 900 901 //!\name Implicit Cast Operators 902 //@{ 903 //! implicit cast to std::string 904 operator std::string() const; 905 //! implicit cast to std::wstring 906 operator std::wstring() const; 907 #if OGRE_STRING_USE_CUSTOM_MEMORY_ALLOCATOR 908 //! implicit cast to Ogre::String 909 operator Ogre::String() const; 910 #endif 911 //@} 912 913 ////////////////////////////////////////////////////////////////////////// 914 915 //!\name UTF-16 character encoding/decoding 916 //@{ 917 //! returns \c true if \a cp does not match the signature for the lead of follow code point of a surrogate pair in a UTF-16 sequence 918 static bool _utf16_independent_char( code_point cp ); 919 //! returns \c true if \a cp matches the signature of a surrogate pair lead character 920 static bool _utf16_surrogate_lead( code_point cp ); 921 //! returns \c true if \a cp matches the signature of a surrogate pair following character 922 static bool _utf16_surrogate_follow( code_point cp ); 923 //! estimates the number of UTF-16 code points in the sequence starting with \a cp 924 static size_t _utf16_char_length( code_point cp ); 925 //! returns the number of UTF-16 code points needed to represent the given UTF-32 character \a cp 926 static size_t _utf16_char_length( unicode_char uc ); 927 //! converts the given UTF-16 character buffer \a in_cp to a single UTF-32 Unicode character \a out_uc, returns the number of code points used to create the output character (2 for surrogate pairs, otherwise 1) 928 /*! This function does it's best to prevent error conditions, verifying complete 929 surrogate pairs before applying the algorithm. In the event that half of a pair 930 is found it will happily generate a value in the 0xD800 - 0xDFFF range, which is 931 normally an invalid Unicode value but we preserve them for use as sentinel values. */ 932 static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc ); 933 //! writes the given UTF-32 \a uc_in to the buffer location \a out_cp using UTF-16 encoding, returns the number of code points used to encode the input (always 1 or 2) 934 /*! This function, like its counterpart, will happily create invalid UTF-16 surrogate pairs. These 935 invalid entries will be created for any value of \c in_uc that falls in the range U+D800 - U+DFFF. 936 These are generally useful as sentinel values to represent various program specific conditions. 937 @note This function will also pass through any single UTF-16 code point without modification, 938 making it a safe method of ensuring a stream that is unknown UTF-32 or UTF-16 is truly UTF-16.*/ 939 static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] ); 940 //@} 941 942 ////////////////////////////////////////////////////////////////////////// 943 944 //!\name UTF-8 character encoding/decoding 945 //@{ 946 //! returns \c true if \a cp is the beginning of a UTF-8 sequence 947 static bool _utf8_start_char( unsigned char cp ); 948 //! estimates the number of UTF-8 code points in the sequence starting with \a cp 949 static size_t _utf8_char_length( unsigned char cp ); 950 //! returns the number of UTF-8 code points needed to represent the given UTF-32 character \a cp 951 static size_t _utf8_char_length( unicode_char uc ); 952 953 //! converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of bytes used to create the output character (maximum of 6) 954 static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc ); 955 //! writes the given UTF-32 \a uc_in to the buffer location \a out_cp using UTF-8 encoding, returns the number of bytes used to encode the input 956 static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] ); 957 958 //! verifies a UTF-8 stream, returning the total number of Unicode characters found 959 static size_type _verifyUTF8( const unsigned char* c_str ); 960 //! verifies a UTF-8 stream, returning the total number of Unicode characters found 961 static size_type _verifyUTF8( const std::string& str ); 962 //@} 963 964 private: 965 //template<class ITER_TYPE> friend class _iterator; 966 dstring mData; 967 968 //! buffer data type identifier 969 enum BufferType { 970 bt_none, 971 bt_string, 972 bt_wstring, 973 bt_utf32string 974 }; 975 976 //! common constructor operations 977 void _init(); 978 979 /////////////////////////////////////////////////////////////////////// 980 // Scratch buffer 981 //! auto cleans the scratch buffer using the proper delete for the stored type 982 void _cleanBuffer() const; 983 984 //! create a std::string in the scratch buffer area 985 void _getBufferStr() const; 986 //! create a std::wstring in the scratch buffer area 987 void _getBufferWStr() const; 988 //! create a utf32string in the scratch buffer area 989 void _getBufferUTF32Str() const; 990 991 void _load_buffer_UTF8() const; 992 void _load_buffer_WStr() const; 993 void _load_buffer_UTF32() const; 994 995 mutable BufferType mBufferType; // identifies the data type held in mBuffer 996 mutable size_t mBufferSize; // size of the CString buffer 997 998 // multi-purpose buffer used everywhere we need a throw-away buffer 999 union { 1000 mutable void* mVoidBuffer; 1001 mutable std::string* mStrBuffer; 1002 mutable std::wstring* mWStrBuffer; 1003 mutable utf32string* mUTF32StrBuffer; 1004 } 1005 mBuffer; 1006 }; 1007 1008 //! string addition operator \relates UTFString 1009 inline UTFString operator+( const UTFString& s1, const UTFString& s2 ) { 1010 return UTFString( s1 ).append( s2 ); 1011 } 1012 //! string addition operator \relates UTFString 1013 inline UTFString operator+( const UTFString& s1, UTFString::code_point c ) { 1014 return UTFString( s1 ).append( 1, c ); 1015 } 1016 //! string addition operator \relates UTFString 1017 inline UTFString operator+( const UTFString& s1, UTFString::unicode_char c ) { 1018 return UTFString( s1 ).append( 1, c ); 1019 } 1020 //! string addition operator \relates UTFString 1021 inline UTFString operator+( const UTFString& s1, char c ) { 1022 return UTFString( s1 ).append( 1, c ); 1023 } 1024 #if OGRE_IS_NATIVE_WCHAR_T 1025 //! string addition operator \relates UTFString 1026 inline UTFString operator+( const UTFString& s1, wchar_t c ) { 1027 return UTFString( s1 ).append( 1, c ); 1028 } 1029 #endif 1030 //! string addition operator \relates UTFString 1031 inline UTFString operator+( UTFString::code_point c, const UTFString& s2 ) { 1032 return UTFString().append( 1, c ).append( s2 ); 1033 } 1034 //! string addition operator \relates UTFString 1035 inline UTFString operator+( UTFString::unicode_char c, const UTFString& s2 ) { 1036 return UTFString().append( 1, c ).append( s2 ); 1037 } 1038 //! string addition operator \relates UTFString 1039 inline UTFString operator+( char c, const UTFString& s2 ) { 1040 return UTFString().append( 1, c ).append( s2 ); 1041 } 1042 #if OGRE_IS_NATIVE_WCHAR_T 1043 //! string addition operator \relates UTFString 1044 inline UTFString operator+( wchar_t c, const UTFString& s2 ) { 1045 return UTFString().append( 1, c ).append( s2 ); 1046 } 1047 #endif 1048 1049 // (const) forward iterator common operators 1050 inline UTFString::size_type operator-( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1051 return ( left.mIter - right.mIter ); 1052 } 1053 inline bool operator==( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1054 return left.mIter == right.mIter; 1055 } 1056 inline bool operator!=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1057 return left.mIter != right.mIter; 1058 } 1059 inline bool operator<( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1060 return left.mIter < right.mIter; 1061 } 1062 inline bool operator<=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1063 return left.mIter <= right.mIter; 1064 } 1065 inline bool operator>( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1066 return left.mIter > right.mIter; 1067 } 1068 inline bool operator>=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 1069 return left.mIter >= right.mIter; 1070 } 1071 1072 // (const) reverse iterator common operators 1073 // NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator 1074 inline UTFString::size_type operator-( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1075 return ( right.mIter - left.mIter ); 1076 } 1077 inline bool operator==( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1078 return left.mIter == right.mIter; 1079 } 1080 inline bool operator!=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1081 return left.mIter != right.mIter; 1082 } 1083 inline bool operator<( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1084 return right.mIter < left.mIter; 1085 } 1086 inline bool operator<=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1087 return right.mIter <= left.mIter; 1088 } 1089 inline bool operator>( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1090 return right.mIter > left.mIter; 1091 } 1092 inline bool operator>=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 1093 return right.mIter >= left.mIter; 1094 } 1095 1096 //! std::ostream write operator \relates UTFString 1097 inline std::ostream& operator << ( std::ostream& os, const UTFString& s ) { 1098 return os << s.asUTF8(); 1099 } 1100 1101 //! std::wostream write operator \relates UTFString 1102 inline std::wostream& operator << ( std::wostream& os, const UTFString& s ) { 1103 return os << s.asWStr(); 1104 } 1105 1106 /** @} */ 1107 /** @} */ 1108 1109 1110 } // namespace Ogre{ 1111 1112 #endif // OGRE_UNICODE_SUPPORT 1113 1114 #include "OgreHeaderSuffix.h" 1115 1116 #endif 1117