1 /*
2  * This source file is part of MyGUI. For the latest info, see http://mygui.info/
3  * Distributed under the MIT License
4  * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
5  */
6 
7 #include "MyGUI_Precompiled.h"
8 #include "MyGUI_UString.h"
9 
10 namespace MyGUI
11 {
12 
13 	//--------------------------------------------------------------------------
_base_iterator()14 	UString::_base_iterator::_base_iterator()
15 	{
16 		mString = nullptr;
17 	}
18 	//--------------------------------------------------------------------------
_seekFwd(size_type c)19 	void UString::_base_iterator::_seekFwd( size_type c )
20 	{
21 		mIter += c;
22 	}
23 	//--------------------------------------------------------------------------
_seekRev(size_type c)24 	void UString::_base_iterator::_seekRev( size_type c )
25 	{
26 		mIter -= c;
27 	}
28 	//--------------------------------------------------------------------------
_become(const _base_iterator & i)29 	void UString::_base_iterator::_become( const _base_iterator& i )
30 	{
31 		mIter = i.mIter;
32 		mString = i.mString;
33 	}
34 	//--------------------------------------------------------------------------
_test_begin() const35 	bool UString::_base_iterator::_test_begin() const
36 	{
37 		return mIter == mString->mData.begin();
38 	}
39 	//--------------------------------------------------------------------------
_test_end() const40 	bool UString::_base_iterator::_test_end() const
41 	{
42 		return mIter == mString->mData.end();
43 	}
44 	//--------------------------------------------------------------------------
_get_index() const45 	UString::size_type UString::_base_iterator::_get_index() const
46 	{
47 		return mIter - mString->mData.begin();
48 	}
49 	//--------------------------------------------------------------------------
_jump_to(size_type index)50 	void UString::_base_iterator::_jump_to( size_type index )
51 	{
52 		mIter = mString->mData.begin() + index;
53 	}
54 	//--------------------------------------------------------------------------
_getCharacter() const55 	UString::unicode_char UString::_base_iterator::_getCharacter() const
56 	{
57 		size_type current_index = _get_index();
58 		return mString->getChar( current_index );
59 	}
60 	//--------------------------------------------------------------------------
_setCharacter(unicode_char uc)61 	int UString::_base_iterator::_setCharacter( unicode_char uc )
62 	{
63 		size_type current_index = _get_index();
64 		int change = mString->setChar( current_index, uc );
65 		_jump_to( current_index );
66 		return change;
67 	}
68 	//--------------------------------------------------------------------------
_moveNext()69 	void UString::_base_iterator::_moveNext()
70 	{
71 		_seekFwd( 1 ); // move 1 code point forward
72 		if ( _test_end() ) return; // exit if we hit the end
73 		if ( _utf16_surrogate_follow( mIter[0] ) ) {
74 			// landing on a follow code point means we might be part of a bigger character
75 			// so we test for that
76 			code_point lead_half = 0;
77 			//NB: we can't possibly be at the beginning here, so no need to test
78 			lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
79 			if ( _utf16_surrogate_lead( lead_half ) ) {
80 				_seekFwd( 1 ); // if so, then advance 1 more code point
81 			}
82 		}
83 	}
84 	//--------------------------------------------------------------------------
_movePrev()85 	void UString::_base_iterator::_movePrev()
86 	{
87 		_seekRev( 1 ); // move 1 code point backwards
88 		if ( _test_begin() ) return; // exit if we hit the beginning
89 		if ( _utf16_surrogate_follow( mIter[0] ) ) {
90 			// landing on a follow code point means we might be part of a bigger character
91 			// so we test for that
92 			code_point lead_half = 0;
93 			lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
94 			if ( _utf16_surrogate_lead( lead_half ) ) {
95 				_seekRev( 1 ); // if so, then rewind 1 more code point
96 			}
97 		}
98 	}
99 	//--------------------------------------------------------------------------
100 	//--------------------------------------------------------------------------
101 	//--------------------------------------------------------------------------
102 	//--------------------------------------------------------------------------
103 	UString::_fwd_iterator::_fwd_iterator() = default;
104 	//--------------------------------------------------------------------------
_fwd_iterator(const _fwd_iterator & i)105 	UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i )
106 	{
107 		_become( i );
108 	}
109 	//--------------------------------------------------------------------------
operator =(const _fwd_iterator & i)110 	UString::_fwd_iterator& UString::_fwd_iterator::operator=( const _fwd_iterator& i )
111 	{
112 		_become( i );
113 		return *this;
114 	}
115 	//--------------------------------------------------------------------------
operator ++()116 	UString::_fwd_iterator& UString::_fwd_iterator::operator++()
117 	{
118 		_seekFwd( 1 );
119 		return *this;
120 	}
121 	//--------------------------------------------------------------------------
operator ++(int)122 	UString::_fwd_iterator UString::_fwd_iterator::operator++( int )
123 	{
124 		_fwd_iterator tmp( *this );
125 		_seekFwd( 1 );
126 		return tmp;
127 	}
128 	//--------------------------------------------------------------------------
operator --()129 	UString::_fwd_iterator& UString::_fwd_iterator::operator--()
130 	{
131 		_seekRev( 1 );
132 		return *this;
133 	}
134 	//--------------------------------------------------------------------------
operator --(int)135 	UString::_fwd_iterator UString::_fwd_iterator::operator--( int )
136 	{
137 		_fwd_iterator tmp( *this );
138 		_seekRev( 1 );
139 		return tmp;
140 	}
141 	//--------------------------------------------------------------------------
operator +(difference_type n)142 	UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n )
143 	{
144 		_fwd_iterator tmp( *this );
145 		if ( n < 0 )
146 			tmp._seekRev( -n );
147 		else
148 			tmp._seekFwd( n );
149 		return tmp;
150 	}
151 	//--------------------------------------------------------------------------
operator -(difference_type n)152 	UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n )
153 	{
154 		_fwd_iterator tmp( *this );
155 		if ( n < 0 )
156 			tmp._seekFwd( -n );
157 		else
158 			tmp._seekRev( n );
159 		return tmp;
160 	}
161 	//--------------------------------------------------------------------------
operator +=(difference_type n)162 	UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n )
163 	{
164 		if ( n < 0 )
165 			_seekRev( -n );
166 		else
167 			_seekFwd( n );
168 		return *this;
169 	}
170 	//--------------------------------------------------------------------------
operator -=(difference_type n)171 	UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n )
172 	{
173 		if ( n < 0 )
174 			_seekFwd( -n );
175 		else
176 			_seekRev( n );
177 		return *this;
178 	}
179 	//--------------------------------------------------------------------------
operator *() const180 	UString::value_type& UString::_fwd_iterator::operator*() const
181 	{
182 		return *mIter;
183 	}
184 	//--------------------------------------------------------------------------
operator [](difference_type n) const185 	UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const
186 	{
187 		_fwd_iterator tmp( *this );
188 		tmp += n;
189 		return *tmp;
190 	}
191 	//--------------------------------------------------------------------------
moveNext()192 	UString::_fwd_iterator& UString::_fwd_iterator::moveNext()
193 	{
194 		_moveNext();
195 		return *this;
196 	}
197 	//--------------------------------------------------------------------------
movePrev()198 	UString::_fwd_iterator& UString::_fwd_iterator::movePrev()
199 	{
200 		_movePrev();
201 		return *this;
202 	}
203 	//--------------------------------------------------------------------------
getCharacter() const204 	UString::unicode_char UString::_fwd_iterator::getCharacter() const
205 	{
206 		return _getCharacter();
207 	}
208 	//--------------------------------------------------------------------------
setCharacter(unicode_char uc)209 	int UString::_fwd_iterator::setCharacter( unicode_char uc )
210 	{
211 		return _setCharacter( uc );
212 	}
213 	//--------------------------------------------------------------------------
214 	//--------------------------------------------------------------------------
215 	//--------------------------------------------------------------------------
216 	//--------------------------------------------------------------------------
217 	UString::_const_fwd_iterator::_const_fwd_iterator() = default;
218 	//--------------------------------------------------------------------------
_const_fwd_iterator(const _const_fwd_iterator & i)219 	UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i )
220 	{
221 		_become( i );
222 	}
223 	//--------------------------------------------------------------------------
operator =(const _const_fwd_iterator & i)224 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator=( const _const_fwd_iterator& i )
225 	{
226 		_become( i );
227 		return *this;
228 	}
229 	//--------------------------------------------------------------------------
_const_fwd_iterator(const _fwd_iterator & i)230 	UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i )
231 	{
232 		_become( i );
233 	}
234 	//--------------------------------------------------------------------------
operator ++()235 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++()
236 	{
237 		_seekFwd( 1 );
238 		return *this;
239 	}
240 	//--------------------------------------------------------------------------
operator ++(int)241 	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int )
242 	{
243 		_const_fwd_iterator tmp( *this );
244 		_seekFwd( 1 );
245 		return tmp;
246 	}
247 	//--------------------------------------------------------------------------
operator --()248 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--()
249 	{
250 		_seekRev( 1 );
251 		return *this;
252 	}
253 	//--------------------------------------------------------------------------
operator --(int)254 	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int )
255 	{
256 		_const_fwd_iterator tmp( *this );
257 		_seekRev( 1 );
258 		return tmp;
259 	}
260 	//--------------------------------------------------------------------------
operator +(difference_type n)261 	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n )
262 	{
263 		_const_fwd_iterator tmp( *this );
264 		if ( n < 0 )
265 			tmp._seekRev( -n );
266 		else
267 			tmp._seekFwd( n );
268 		return tmp;
269 	}
270 	//--------------------------------------------------------------------------
operator -(difference_type n)271 	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n )
272 	{
273 		_const_fwd_iterator tmp( *this );
274 		if ( n < 0 )
275 			tmp._seekFwd( -n );
276 		else
277 			tmp._seekRev( n );
278 		return tmp;
279 	}
280 	//--------------------------------------------------------------------------
operator +=(difference_type n)281 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n )
282 	{
283 		if ( n < 0 )
284 			_seekRev( -n );
285 		else
286 			_seekFwd( n );
287 		return *this;
288 	}
289 	//--------------------------------------------------------------------------
operator -=(difference_type n)290 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n )
291 	{
292 		if ( n < 0 )
293 			_seekFwd( -n );
294 		else
295 			_seekRev( n );
296 		return *this;
297 	}
298 	//--------------------------------------------------------------------------
operator *() const299 	const UString::value_type& UString::_const_fwd_iterator::operator*() const
300 	{
301 		return *mIter;
302 	}
303 	//--------------------------------------------------------------------------
operator [](difference_type n) const304 	const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const
305 	{
306 		_const_fwd_iterator tmp( *this );
307 		tmp += n;
308 		return *tmp;
309 	}
310 	//--------------------------------------------------------------------------
moveNext()311 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext()
312 	{
313 		_moveNext();
314 		return *this;
315 	}
316 	//--------------------------------------------------------------------------
movePrev()317 	UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev()
318 	{
319 		_movePrev();
320 		return *this;
321 	}
322 	//--------------------------------------------------------------------------
getCharacter() const323 	UString::unicode_char UString::_const_fwd_iterator::getCharacter() const
324 	{
325 		return _getCharacter();
326 	}
327 	//--------------------------------------------------------------------------
328 	//--------------------------------------------------------------------------
329 	//--------------------------------------------------------------------------
330 	//--------------------------------------------------------------------------
331 	UString::_rev_iterator::_rev_iterator() = default;
332 	//--------------------------------------------------------------------------
_rev_iterator(const _rev_iterator & i)333 	UString::_rev_iterator::_rev_iterator( const _rev_iterator& i )
334 	{
335 		_become( i );
336 	}
337 	//--------------------------------------------------------------------------
operator ++()338 	UString::_rev_iterator& UString::_rev_iterator::operator++()
339 	{
340 		_seekRev( 1 );
341 		return *this;
342 	}
343 	//--------------------------------------------------------------------------
operator ++(int)344 	UString::_rev_iterator UString::_rev_iterator::operator++( int )
345 	{
346 		_rev_iterator tmp( *this );
347 		_seekRev( 1 );
348 		return tmp;
349 	}
350 	//--------------------------------------------------------------------------
operator --()351 	UString::_rev_iterator& UString::_rev_iterator::operator--()
352 	{
353 		_seekFwd( 1 );
354 		return *this;
355 	}
356 	//--------------------------------------------------------------------------
operator --(int)357 	UString::_rev_iterator UString::_rev_iterator::operator--( int )
358 	{
359 		_rev_iterator tmp( *this );
360 		_seekFwd( 1 );
361 		return tmp;
362 	}
363 	//--------------------------------------------------------------------------
operator +(difference_type n)364 	UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n )
365 	{
366 		_rev_iterator tmp( *this );
367 		if ( n < 0 )
368 			tmp._seekFwd( -n );
369 		else
370 			tmp._seekRev( n );
371 		return tmp;
372 	}
373 	//--------------------------------------------------------------------------
operator -(difference_type n)374 	UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n )
375 	{
376 		_rev_iterator tmp( *this );
377 		if ( n < 0 )
378 			tmp._seekRev( -n );
379 		else
380 			tmp._seekFwd( n );
381 		return tmp;
382 	}
383 	//--------------------------------------------------------------------------
operator +=(difference_type n)384 	UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n )
385 	{
386 		if ( n < 0 )
387 			_seekFwd( -n );
388 		else
389 			_seekRev( n );
390 		return *this;
391 	}
392 	//--------------------------------------------------------------------------
operator -=(difference_type n)393 	UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n )
394 	{
395 		if ( n < 0 )
396 			_seekRev( -n );
397 		else
398 			_seekFwd( n );
399 		return *this;
400 	}
401 	//--------------------------------------------------------------------------
operator *() const402 	UString::value_type& UString::_rev_iterator::operator*() const
403 	{
404 		return mIter[-1];
405 	}
406 	//--------------------------------------------------------------------------
operator [](difference_type n) const407 	UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const
408 	{
409 		_rev_iterator tmp( *this );
410 		tmp -= n;
411 		return *tmp;
412 	}
413 	//--------------------------------------------------------------------------
414 	//--------------------------------------------------------------------------
415 	//--------------------------------------------------------------------------
416 	//--------------------------------------------------------------------------
417 	UString::_const_rev_iterator::_const_rev_iterator() = default;
418 	//--------------------------------------------------------------------------
_const_rev_iterator(const _const_rev_iterator & i)419 	UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i )
420 	{
421 		_become( i );
422 	}
423 	//--------------------------------------------------------------------------
_const_rev_iterator(const _rev_iterator & i)424 	UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i )
425 	{
426 		_become( i );
427 	}
428 	//--------------------------------------------------------------------------
operator ++()429 	UString::_const_rev_iterator& UString::_const_rev_iterator::operator++()
430 	{
431 		_seekRev( 1 );
432 		return *this;
433 	}
434 	//--------------------------------------------------------------------------
operator ++(int)435 	UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int )
436 	{
437 		_const_rev_iterator tmp( *this );
438 		_seekRev( 1 );
439 		return tmp;
440 	}
441 	//--------------------------------------------------------------------------
operator --()442 	UString::_const_rev_iterator& UString::_const_rev_iterator::operator--()
443 	{
444 		_seekFwd( 1 );
445 		return *this;
446 	}
447 	//--------------------------------------------------------------------------
operator --(int)448 	UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int )
449 	{
450 		_const_rev_iterator tmp( *this );
451 		_seekFwd( 1 );
452 		return tmp;
453 	}
454 	//--------------------------------------------------------------------------
operator +(difference_type n)455 	UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n )
456 	{
457 		_const_rev_iterator tmp( *this );
458 		if ( n < 0 )
459 			tmp._seekFwd( -n );
460 		else
461 			tmp._seekRev( n );
462 		return tmp;
463 	}
464 	//--------------------------------------------------------------------------
operator -(difference_type n)465 	UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n )
466 	{
467 		_const_rev_iterator tmp( *this );
468 		if ( n < 0 )
469 			tmp._seekRev( -n );
470 		else
471 			tmp._seekFwd( n );
472 		return tmp;
473 	}
474 	//--------------------------------------------------------------------------
operator +=(difference_type n)475 	UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n )
476 	{
477 		if ( n < 0 )
478 			_seekFwd( -n );
479 		else
480 			_seekRev( n );
481 		return *this;
482 	}
483 	//--------------------------------------------------------------------------
operator -=(difference_type n)484 	UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n )
485 	{
486 		if ( n < 0 )
487 			_seekRev( -n );
488 		else
489 			_seekFwd( n );
490 		return *this;
491 	}
492 	//--------------------------------------------------------------------------
operator *() const493 	const UString::value_type& UString::_const_rev_iterator::operator*() const
494 	{
495 		return mIter[-1];
496 	}
497 	//--------------------------------------------------------------------------
operator [](difference_type n) const498 	const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const
499 	{
500 		_const_rev_iterator tmp( *this );
501 		tmp -= n;
502 		return *tmp;
503 	}
504 	//--------------------------------------------------------------------------
505 	//--------------------------------------------------------------------------
506 	//--------------------------------------------------------------------------
507 	//--------------------------------------------------------------------------
UString()508 	UString::UString()
509 	{
510 		_init();
511 	}
512 	//--------------------------------------------------------------------------
UString(const UString & copy)513 	UString::UString( const UString& copy )
514 	{
515 		_init();
516 		mData = copy.mData;
517 	}
518 	//--------------------------------------------------------------------------
UString(size_type length,const code_point & ch)519 	UString::UString( size_type length, const code_point& ch )
520 	{
521 		_init();
522 		assign( length, ch );
523 	}
524 	//--------------------------------------------------------------------------
UString(const code_point * str)525 	UString::UString( const code_point* str )
526 	{
527 		_init();
528 		assign( str );
529 	}
530 	//--------------------------------------------------------------------------
UString(const code_point * str,size_type length)531 	UString::UString( const code_point* str, size_type length )
532 	{
533 		_init();
534 		assign( str, length );
535 	}
536 	//--------------------------------------------------------------------------
UString(const UString & str,size_type index,size_type length)537 	UString::UString( const UString& str, size_type index, size_type length )
538 	{
539 		_init();
540 		assign( str, index, length );
541 	}
542 	//--------------------------------------------------------------------------
543 #if MYGUI_IS_NATIVE_WCHAR_T
UString(const wchar_t * w_str)544 	UString::UString( const wchar_t* w_str )
545 	{
546 		_init();
547 		assign( w_str );
548 	}
549 	//--------------------------------------------------------------------------
UString(const wchar_t * w_str,size_type length)550 	UString::UString( const wchar_t* w_str, size_type length )
551 	{
552 		_init();
553 		assign( w_str, length );
554 	}
555 #endif
556 	//--------------------------------------------------------------------------
UString(const std::wstring & wstr)557 	UString::UString( const std::wstring& wstr )
558 	{
559 		_init();
560 		assign( wstr );
561 	}
562 	//--------------------------------------------------------------------------
UString(const char * c_str)563 	UString::UString( const char* c_str )
564 	{
565 		_init();
566 		assign( c_str );
567 	}
568 	//--------------------------------------------------------------------------
UString(const char * c_str,size_type length)569 	UString::UString( const char* c_str, size_type length )
570 	{
571 		_init();
572 		assign( c_str, length );
573 	}
574 	//--------------------------------------------------------------------------
UString(const std::string & str)575 	UString::UString( const std::string& str )
576 	{
577 		_init();
578 		assign( str );
579 	}
580 	//--------------------------------------------------------------------------
~UString()581 	UString::~UString()
582 	{
583 		_cleanBuffer();
584 	}
585 	//--------------------------------------------------------------------------
size() const586 	UString::size_type UString::size() const
587 	{
588 		return mData.size();
589 	}
590 	//--------------------------------------------------------------------------
length() const591 	UString::size_type UString::length() const
592 	{
593 		return size();
594 	}
595 	//--------------------------------------------------------------------------
length_Characters() const596 	UString::size_type UString::length_Characters() const
597 	{
598 		const_iterator i = begin(), ie = end();
599 		size_type c = 0;
600 		while ( i != ie ) {
601 			i.moveNext();
602 			++c;
603 		}
604 		return c;
605 	}
606 	//--------------------------------------------------------------------------
max_size() const607 	UString::size_type UString::max_size() const
608 	{
609 		return mData.max_size();
610 	}
611 	//--------------------------------------------------------------------------
reserve(size_type size)612 	void UString::reserve( size_type size )
613 	{
614 		mData.reserve( size );
615 	}
616 	//--------------------------------------------------------------------------
resize(size_type num,const code_point & val)617 	void UString::resize( size_type num, const code_point& val /*= 0 */ )
618 	{
619 		mData.resize( num, val );
620 	}
621 	//--------------------------------------------------------------------------
swap(UString & from)622 	void UString::swap( UString& from )
623 	{
624 		mData.swap( from.mData );
625 	}
626 	//--------------------------------------------------------------------------
empty() const627 	bool UString::empty() const
628 	{
629 		return mData.empty();
630 	}
631 	//--------------------------------------------------------------------------
c_str() const632 	const UString::code_point* UString::c_str() const
633 	{
634 		return mData.c_str();
635 	}
636 	//--------------------------------------------------------------------------
data() const637 	const UString::code_point* UString::data() const
638 	{
639 		return c_str();
640 	}
641 	//--------------------------------------------------------------------------
capacity() const642 	UString::size_type UString::capacity() const
643 	{
644 		return mData.capacity();
645 	}
646 	//--------------------------------------------------------------------------
clear()647 	void UString::clear()
648 	{
649 		mData.clear();
650 	}
651 	//--------------------------------------------------------------------------
substr(size_type index,size_type num) const652 	UString UString::substr( size_type index, size_type num /*= npos */ ) const
653 	{
654 		// this could avoid the extra copy if we used a private specialty constructor
655 		dstring data = mData.substr( index, num );
656 		UString tmp;
657 		tmp.mData.swap( data );
658 		return tmp;
659 	}
660 	//--------------------------------------------------------------------------
push_back(unicode_char val)661 	void UString::push_back( unicode_char val )
662 	{
663 		code_point cp[2];
664 		size_t c = _utf32_to_utf16( val, cp );
665 		if ( c > 0 ) push_back( cp[0] );
666 		if ( c > 1 ) push_back( cp[1] );
667 	}
668 	//--------------------------------------------------------------------------
669 #if MYGUI_IS_NATIVE_WCHAR_T
push_back(wchar_t val)670 	void UString::push_back( wchar_t val )
671 	{
672 		// we do this because the Unicode method still preserves UTF-16 code points
673 		mData.push_back( static_cast<code_point>( val ) );
674 	}
675 #endif
676 	//--------------------------------------------------------------------------
push_back(code_point val)677 	void UString::push_back( code_point val )
678 	{
679 		mData.push_back( val );
680 	}
681 
push_back(char val)682 	void UString::push_back( char val )
683 	{
684 		mData.push_back( static_cast<code_point>( val ) );
685 	}
686 
inString(unicode_char ch) const687 	bool UString::inString( unicode_char ch ) const
688 	{
689 		const_iterator i, ie = end();
690 		for ( i = begin(); i != ie; i.moveNext() ) {
691 			if ( i.getCharacter() == ch )
692 				return true;
693 		}
694 		return false;
695 	}
696 
asUTF8() const697 	const std::string& UString::asUTF8() const
698 	{
699 		_load_buffer_UTF8();
700 		return *m_buffer.mStrBuffer;
701 	}
702 
asUTF8_c_str() const703 	const char* UString::asUTF8_c_str() const
704 	{
705 		_load_buffer_UTF8();
706 		return m_buffer.mStrBuffer->c_str();
707 	}
708 
asUTF32() const709 	const UString::utf32string& UString::asUTF32() const
710 	{
711 		_load_buffer_UTF32();
712 		return *m_buffer.mUTF32StrBuffer;
713 	}
714 
asUTF32_c_str() const715 	const UString::unicode_char* UString::asUTF32_c_str() const
716 	{
717 		_load_buffer_UTF32();
718 		return m_buffer.mUTF32StrBuffer->c_str();
719 	}
720 
asWStr() const721 	const std::wstring& UString::asWStr() const
722 	{
723 		_load_buffer_WStr();
724 		return *m_buffer.mWStrBuffer;
725 	}
726 
asWStr_c_str() const727 	const wchar_t* UString::asWStr_c_str() const
728 	{
729 		_load_buffer_WStr();
730 		return m_buffer.mWStrBuffer->c_str();
731 	}
732 
at(size_type loc)733 	UString::code_point& UString::at( size_type loc )
734 	{
735 		return mData.at( loc );
736 	}
737 
at(size_type loc) const738 	const UString::code_point& UString::at( size_type loc ) const
739 	{
740 		return mData.at( loc );
741 	}
742 
getChar(size_type loc) const743 	UString::unicode_char UString::getChar( size_type loc ) const
744 	{
745 		const code_point* ptr = c_str();
746 		unicode_char uc;
747 		size_t l = _utf16_char_length( ptr[loc] );
748 		code_point cp[2] = { /* blame the code beautifier */
749 			0, 0
750 		};
751 		cp[0] = ptr[loc];
752 
753 		if ( l == 2 && ( loc + 1 ) < mData.length() ) {
754 			cp[1] = ptr[loc+1];
755 		}
756 		_utf16_to_utf32( cp, uc );
757 		return uc;
758 	}
759 
setChar(size_type loc,unicode_char ch)760 	int UString::setChar( size_type loc, unicode_char ch )
761 	{
762 		code_point cp[2] = { /* blame the code beautifier */
763 			0, 0
764 		};
765 		size_t l = _utf32_to_utf16( ch, cp );
766 		unicode_char existingChar = getChar( loc );
767 		size_t existingSize = _utf16_char_length( existingChar );
768 		size_t newSize = _utf16_char_length( ch );
769 
770 		if ( newSize > existingSize ) {
771 			at( loc ) = cp[0];
772 			insert( loc + 1, 1, cp[1] );
773 			return 1;
774 		}
775 		if ( newSize < existingSize ) {
776 			erase( loc, 1 );
777 			at( loc ) = cp[0];
778 			return -1;
779 		}
780 
781 		// newSize == existingSize
782 		at( loc ) = cp[0];
783 		if ( l == 2 ) at( loc + 1 ) = cp[1];
784 		return 0;
785 	}
786 
begin()787 	UString::iterator UString::begin()
788 	{
789 		iterator i;
790 		i.mIter = mData.begin();
791 		i.mString = this;
792 		return i;
793 	}
794 
begin() const795 	UString::const_iterator UString::begin() const
796 	{
797 		const_iterator i;
798 		i.mIter = const_cast<UString*>( this )->mData.begin();
799 		i.mString = const_cast<UString*>( this );
800 		return i;
801 	}
802 
end()803 	UString::iterator UString::end()
804 	{
805 		iterator i;
806 		i.mIter = mData.end();
807 		i.mString = this;
808 		return i;
809 	}
810 
end() const811 	UString::const_iterator UString::end() const
812 	{
813 		const_iterator i;
814 		i.mIter = const_cast<UString*>( this )->mData.end();
815 		i.mString = const_cast<UString*>( this );
816 		return i;
817 	}
818 
rbegin()819 	UString::reverse_iterator UString::rbegin()
820 	{
821 		reverse_iterator i;
822 		i.mIter = mData.end();
823 		i.mString = this;
824 		return i;
825 	}
826 
rbegin() const827 	UString::const_reverse_iterator UString::rbegin() const
828 	{
829 		const_reverse_iterator i;
830 		i.mIter = const_cast<UString*>( this )->mData.end();
831 		i.mString = const_cast<UString*>( this );
832 		return i;
833 	}
834 
rend()835 	UString::reverse_iterator UString::rend()
836 	{
837 		reverse_iterator i;
838 		i.mIter = mData.begin();
839 		i.mString = this;
840 		return i;
841 	}
842 
rend() const843 	UString::const_reverse_iterator UString::rend() const
844 	{
845 		const_reverse_iterator i;
846 		i.mIter = const_cast<UString*>( this )->mData.begin();
847 		i.mString = const_cast<UString*>( this );
848 		return i;
849 	}
850 
assign(iterator start,iterator end)851 	UString& UString::assign( iterator start, iterator end )
852 	{
853 		mData.assign( start.mIter, end.mIter );
854 		return *this;
855 	}
856 
assign(const UString & str)857 	UString& UString::assign( const UString& str )
858 	{
859 		mData.assign( str.mData );
860 		return *this;
861 	}
862 
assign(const code_point * str)863 	UString& UString::assign( const code_point* str )
864 	{
865 		mData.assign( str );
866 		return *this;
867 	}
868 
assign(const code_point * str,size_type num)869 	UString& UString::assign( const code_point* str, size_type num )
870 	{
871 		mData.assign( str, num );
872 		return *this;
873 	}
874 
assign(const UString & str,size_type index,size_type len)875 	UString& UString::assign( const UString& str, size_type index, size_type len )
876 	{
877 		mData.assign( str.mData, index, len );
878 		return *this;
879 	}
880 
assign(size_type num,const code_point & ch)881 	UString& UString::assign( size_type num, const code_point& ch )
882 	{
883 		mData.assign( num, ch );
884 		return *this;
885 	}
886 
assign(const std::wstring & wstr)887 	UString& UString::assign( const std::wstring& wstr )
888 	{
889 		mData.clear();
890 		mData.reserve( wstr.length() ); // best guess bulk allocate
891 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
892 		code_point tmp;
893 		std::wstring::const_iterator i, ie = wstr.end();
894 		for ( i = wstr.begin(); i != ie; i++ ) {
895 			tmp = static_cast<code_point>( *i );
896 			mData.push_back( tmp );
897 		}
898 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
899 		code_point cp[3] = {0, 0, 0};
900 		unicode_char tmp;
901 		std::wstring::const_iterator i, ie = wstr.end();
902 		for ( i = wstr.begin(); i != ie; i++ ) {
903 			tmp = static_cast<unicode_char>( *i );
904 			size_t l = _utf32_to_utf16( tmp, cp );
905 			if ( l > 0 ) mData.push_back( cp[0] );
906 			if ( l > 1 ) mData.push_back( cp[1] );
907 		}
908 #endif
909 		return *this;
910 	}
911 
912 #if MYGUI_IS_NATIVE_WCHAR_T
assign(const wchar_t * w_str)913 	UString& UString::assign( const wchar_t* w_str )
914 	{
915 		std::wstring tmp;
916 		tmp.assign( w_str );
917 		return assign( tmp );
918 	}
919 
assign(const wchar_t * w_str,size_type num)920 	UString& UString::assign( const wchar_t* w_str, size_type num )
921 	{
922 		std::wstring tmp;
923 		tmp.assign( w_str, num );
924 		return assign( tmp );
925 	}
926 #endif
927 
assign(const std::string & str)928 	UString& UString::assign( const std::string& str )
929 	{
930 		size_type len = _verifyUTF8( str );
931 		clear(); // empty our contents, if there are any
932 		reserve( len ); // best guess bulk capacity growth
933 
934 		// This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
935 		// then converting it to UTF-16, then finally appending the data buffer
936 
937 		unicode_char uc;          // temporary Unicode character buffer
938 		unsigned char utf8buf[7]; // temporary UTF-8 buffer
939 		utf8buf[6] = 0;
940 		size_t utf8len;           // UTF-8 length
941 		code_point utf16buff[3];  // temporary UTF-16 buffer
942 		utf16buff[2] = 0;
943 		size_t utf16len;          // UTF-16 length
944 
945 		std::string::const_iterator i, ie = str.end();
946 		for ( i = str.begin(); i != ie; i++ ) {
947 			utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
948 			for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
949 				utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
950 			}
951 			utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
952 			utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
953 			i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
954 
955 			utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
956 			append( utf16buff, utf16len ); // append the characters to the string
957 		}
958 		return *this;
959 	}
960 
assign(const char * c_str)961 	UString& UString::assign( const char* c_str )
962 	{
963 		std::string tmp( c_str );
964 		return assign( tmp );
965 	}
966 
assign(const char * c_str,size_type num)967 	UString& UString::assign( const char* c_str, size_type num )
968 	{
969 		std::string tmp;
970 		tmp.assign( c_str, num );
971 		return assign( tmp );
972 	}
973 
append(const UString & str)974 	UString& UString::append( const UString& str )
975 	{
976 		mData.append( str.mData );
977 		return *this;
978 	}
979 
append(const code_point * str)980 	UString& UString::append( const code_point* str )
981 	{
982 		mData.append( str );
983 		return *this;
984 	}
985 
append(const UString & str,size_type index,size_type len)986 	UString& UString::append( const UString& str, size_type index, size_type len )
987 	{
988 		mData.append( str.mData, index, len );
989 		return *this;
990 	}
991 
append(const code_point * str,size_type num)992 	UString& UString::append( const code_point* str, size_type num )
993 	{
994 		mData.append( str, num );
995 		return *this;
996 	}
997 
append(size_type num,code_point ch)998 	UString& UString::append( size_type num, code_point ch )
999 	{
1000 		mData.append( num, ch );
1001 		return *this;
1002 	}
1003 
append(iterator start,iterator end)1004 	UString& UString::append( iterator start, iterator end )
1005 	{
1006 		mData.append( start.mIter, end.mIter );
1007 		return *this;
1008 	}
1009 
1010 #if MYGUI_IS_NATIVE_WCHAR_T
append(const wchar_t * w_str,size_type num)1011 	UString& UString::append( const wchar_t* w_str, size_type num )
1012 	{
1013 		std::wstring tmp( w_str, num );
1014 		return append( tmp );
1015 	}
1016 
append(size_type num,wchar_t ch)1017 	UString& UString::append( size_type num, wchar_t ch )
1018 	{
1019 		return append( num, static_cast<unicode_char>( ch ) );
1020 	}
1021 #endif
append(const char * c_str,size_type num)1022 	UString& UString::append( const char* c_str, size_type num )
1023 	{
1024 		UString tmp( c_str, num );
1025 		append( tmp );
1026 		return *this;
1027 	}
1028 
append(size_type num,char ch)1029 	UString& UString::append( size_type num, char ch )
1030 	{
1031 		append( num, static_cast<code_point>( ch ) );
1032 		return *this;
1033 	}
1034 
append(size_type num,unicode_char ch)1035 	UString& UString::append( size_type num, unicode_char ch )
1036 	{
1037 		code_point cp[2] = {0, 0};
1038 		if ( _utf32_to_utf16( ch, cp ) == 2 ) {
1039 			for ( size_type i = 0; i < num; i++ ) {
1040 				append( 1, cp[0] );
1041 				append( 1, cp[1] );
1042 			}
1043 		} else {
1044 			for ( size_type i = 0; i < num; i++ ) {
1045 				append( 1, cp[0] );
1046 			}
1047 		}
1048 		return *this;
1049 	}
1050 
insert(iterator i,const code_point & ch)1051 	UString::iterator UString::insert( iterator i, const code_point& ch )
1052 	{
1053 		iterator ret;
1054 		ret.mIter = mData.insert( i.mIter, ch );
1055 		ret.mString = this;
1056 		return ret;
1057 	}
1058 
insert(size_type index,const UString & str)1059 	UString& UString::insert( size_type index, const UString& str )
1060 	{
1061 		mData.insert( index, str.mData );
1062 		return *this;
1063 	}
1064 
insert(size_type index1,const UString & str,size_type index2,size_type num)1065 	UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
1066 	{
1067 		mData.insert( index1, str.mData, index2, num );
1068 		return *this;
1069 	}
1070 
insert(iterator i,iterator start,iterator end)1071 	void UString::insert( iterator i, iterator start, iterator end )
1072 	{
1073 		mData.insert( i.mIter, start.mIter, end.mIter );
1074 	}
1075 
insert(size_type index,const code_point * str,size_type num)1076 	UString& UString::insert( size_type index, const code_point* str, size_type num )
1077 	{
1078 		mData.insert( index, str, num );
1079 		return *this;
1080 	}
1081 
1082 #if MYGUI_IS_NATIVE_WCHAR_T
insert(size_type index,const wchar_t * w_str,size_type num)1083 	UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
1084 	{
1085 		UString tmp( w_str, num );
1086 		insert( index, tmp );
1087 		return *this;
1088 	}
1089 #endif
1090 
insert(size_type index,const char * c_str,size_type num)1091 	UString& UString::insert( size_type index, const char* c_str, size_type num )
1092 	{
1093 		UString tmp( c_str, num );
1094 		insert( index, tmp );
1095 		return *this;
1096 	}
1097 
insert(size_type index,size_type num,code_point ch)1098 	UString& UString::insert( size_type index, size_type num, code_point ch )
1099 	{
1100 		mData.insert( index, num, ch );
1101 		return *this;
1102 	}
1103 
1104 #if MYGUI_IS_NATIVE_WCHAR_T
insert(size_type index,size_type num,wchar_t ch)1105 	UString& UString::insert( size_type index, size_type num, wchar_t ch )
1106 	{
1107 		insert( index, num, static_cast<unicode_char>( ch ) );
1108 		return *this;
1109 	}
1110 #endif
1111 
insert(size_type index,size_type num,char ch)1112 	UString& UString::insert( size_type index, size_type num, char ch )
1113 	{
1114 		insert( index, num, static_cast<code_point>( ch ) );
1115 		return *this;
1116 	}
1117 
insert(size_type index,size_type num,unicode_char ch)1118 	UString& UString::insert( size_type index, size_type num, unicode_char ch )
1119 	{
1120 		code_point cp[3] = {0, 0, 0};
1121 		size_t l = _utf32_to_utf16( ch, cp );
1122 		if ( l == 1 ) {
1123 			return insert( index, num, cp[0] );
1124 		}
1125 		for ( size_type c = 0; c < num; c++ ) {
1126 			// insert in reverse order to preserve ordering after insert
1127 			insert( index, 1, cp[1] );
1128 			insert( index, 1, cp[0] );
1129 		}
1130 		return *this;
1131 	}
1132 
insert(iterator i,size_type num,const code_point & ch)1133 	void UString::insert( iterator i, size_type num, const code_point& ch )
1134 	{
1135 		mData.insert( i.mIter, num, ch );
1136 	}
1137 #if MYGUI_IS_NATIVE_WCHAR_T
insert(iterator i,size_type num,const wchar_t & ch)1138 	void UString::insert( iterator i, size_type num, const wchar_t& ch )
1139 	{
1140 		insert( i, num, static_cast<unicode_char>( ch ) );
1141 	}
1142 #endif
1143 
insert(iterator i,size_type num,const char & ch)1144 	void UString::insert( iterator i, size_type num, const char& ch )
1145 	{
1146 		insert( i, num, static_cast<code_point>( ch ) );
1147 	}
1148 
insert(iterator i,size_type num,const unicode_char & ch)1149 	void UString::insert( iterator i, size_type num, const unicode_char& ch )
1150 	{
1151 		code_point cp[3] = {0, 0, 0};
1152 		size_t l = _utf32_to_utf16( ch, cp );
1153 		if ( l == 1 ) {
1154 			insert( i, num, cp[0] );
1155 		} else {
1156 			for ( size_type c = 0; c < num; c++ ) {
1157 				// insert in reverse order to preserve ordering after insert
1158 				insert( i, 1, cp[1] );
1159 				insert( i, 1, cp[0] );
1160 			}
1161 		}
1162 	}
1163 
erase(iterator loc)1164 	UString::iterator UString::erase( iterator loc )
1165 	{
1166 		iterator ret;
1167 		ret.mIter = mData.erase( loc.mIter );
1168 		ret.mString = this;
1169 		return ret;
1170 	}
1171 
erase(iterator start,iterator end)1172 	UString::iterator UString::erase( iterator start, iterator end )
1173 	{
1174 		iterator ret;
1175 		ret.mIter = mData.erase( start.mIter, end.mIter );
1176 		ret.mString = this;
1177 		return ret;
1178 	}
1179 
erase(size_type index,size_type num)1180 	UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
1181 	{
1182 		if ( num == npos )
1183 			mData.erase( index );
1184 		else
1185 			mData.erase( index, num );
1186 		return *this;
1187 	}
1188 
replace(size_type index1,size_type num1,const UString & str)1189 	UString& UString::replace( size_type index1, size_type num1, const UString& str )
1190 	{
1191 		mData.replace( index1, num1, str.mData, 0, npos );
1192 		return *this;
1193 	}
1194 
replace(size_type index1,size_type num1,const UString & str,size_type num2)1195 	UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
1196 	{
1197 		mData.replace( index1, num1, str.mData, 0, num2 );
1198 		return *this;
1199 	}
1200 
replace(size_type index1,size_type num1,const UString & str,size_type index2,size_type num2)1201 	UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
1202 	{
1203 		mData.replace( index1, num1, str.mData, index2, num2 );
1204 		return *this;
1205 	}
1206 
replace(iterator start,iterator end,const UString & str,size_type num)1207 	UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
1208 	{
1209 		_const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1210 
1211 		size_type index1 = begin() - st;
1212 		size_type num1 = end - st;
1213 		return replace( index1, num1, str, 0, num );
1214 	}
1215 
replace(size_type index,size_type num1,size_type num2,code_point ch)1216 	UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch )
1217 	{
1218 		mData.replace( index, num1, num2, ch );
1219 		return *this;
1220 	}
1221 
replace(iterator start,iterator end,size_type num,code_point ch)1222 	UString& UString::replace( iterator start, iterator end, size_type num, code_point ch )
1223 	{
1224 		_const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1225 
1226 		size_type index1 = begin() - st;
1227 		size_type num1 = end - st;
1228 		return replace( index1, num1, num, ch );
1229 	}
1230 
compare(const UString & str) const1231 	int UString::compare( const UString& str ) const
1232 	{
1233 		return mData.compare( str.mData );
1234 	}
1235 
compare(const code_point * str) const1236 	int UString::compare( const code_point* str ) const
1237 	{
1238 		return mData.compare( str );
1239 	}
1240 
compare(size_type index,size_type length,const UString & str) const1241 	int UString::compare( size_type index, size_type length, const UString& str ) const
1242 	{
1243 		return mData.compare( index, length, str.mData );
1244 	}
1245 
compare(size_type index,size_type length,const UString & str,size_type index2,size_type length2) const1246 	int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
1247 	{
1248 		return mData.compare( index, length, str.mData, index2, length2 );
1249 	}
1250 
compare(size_type index,size_type length,const code_point * str,size_type length2) const1251 	int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
1252 	{
1253 		return mData.compare( index, length, str, length2 );
1254 	}
1255 
1256 #if MYGUI_IS_NATIVE_WCHAR_T
compare(size_type index,size_type length,const wchar_t * w_str,size_type length2) const1257 	int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
1258 	{
1259 		UString tmp( w_str, length2 );
1260 		return compare( index, length, tmp );
1261 	}
1262 #endif
1263 
compare(size_type index,size_type length,const char * c_str,size_type length2) const1264 	int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
1265 	{
1266 		UString tmp( c_str, length2 );
1267 		return compare( index, length, tmp );
1268 	}
1269 
find(const UString & str,size_type index) const1270 	UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
1271 	{
1272 		return mData.find( str.c_str(), index );
1273 	}
1274 
find(const code_point * cp_str,size_type index,size_type length) const1275 	UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const
1276 	{
1277 		UString tmp( cp_str );
1278 		return mData.find( tmp.c_str(), index, length );
1279 	}
1280 
find(const char * c_str,size_type index,size_type length) const1281 	UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const
1282 	{
1283 		UString tmp( c_str );
1284 		return mData.find( tmp.c_str(), index, length );
1285 	}
1286 
1287 #if MYGUI_IS_NATIVE_WCHAR_T
find(const wchar_t * w_str,size_type index,size_type length) const1288 	UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
1289 	{
1290 		UString tmp( w_str );
1291 		return mData.find( tmp.c_str(), index, length );
1292 	}
1293 #endif
1294 
find(char ch,size_type index) const1295 	UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
1296 	{
1297 		return find( static_cast<code_point>( ch ), index );
1298 	}
1299 
find(code_point ch,size_type index) const1300 	UString::size_type UString::find( code_point ch, size_type index /*= 0 */ ) const
1301 	{
1302 		return mData.find( ch, index );
1303 	}
1304 
1305 #if MYGUI_IS_NATIVE_WCHAR_T
find(wchar_t ch,size_type index) const1306 	UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
1307 	{
1308 		return find( static_cast<unicode_char>( ch ), index );
1309 	}
1310 #endif
1311 
find(unicode_char ch,size_type index) const1312 	UString::size_type UString::find( unicode_char ch, size_type index /*= 0 */ ) const
1313 	{
1314 		code_point cp[3] = {0, 0, 0};
1315 		size_t l = _utf32_to_utf16( ch, cp );
1316 		return find( UString( cp, l ), index );
1317 	}
1318 
rfind(const UString & str,size_type index) const1319 	UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
1320 	{
1321 		return mData.rfind( str.c_str(), index );
1322 	}
1323 
rfind(const code_point * cp_str,size_type index,size_type num) const1324 	UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const
1325 	{
1326 		UString tmp( cp_str );
1327 		return mData.rfind( tmp.c_str(), index, num );
1328 	}
1329 
rfind(const char * c_str,size_type index,size_type num) const1330 	UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
1331 	{
1332 		UString tmp( c_str );
1333 		return mData.rfind( tmp.c_str(), index, num );
1334 	}
1335 
1336 #if MYGUI_IS_NATIVE_WCHAR_T
rfind(const wchar_t * w_str,size_type index,size_type num) const1337 	UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
1338 	{
1339 		UString tmp( w_str );
1340 		return mData.rfind( tmp.c_str(), index, num );
1341 	}
1342 #endif
1343 
rfind(char ch,size_type index) const1344 	UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
1345 	{
1346 		return rfind( static_cast<code_point>( ch ), index );
1347 	}
1348 
rfind(code_point ch,size_type index) const1349 	UString::size_type UString::rfind( code_point ch, size_type index ) const
1350 	{
1351 		return mData.rfind( ch, index );
1352 	}
1353 
1354 #if MYGUI_IS_NATIVE_WCHAR_T
rfind(wchar_t ch,size_type index) const1355 	UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
1356 	{
1357 		return rfind( static_cast<unicode_char>( ch ), index );
1358 	}
1359 #endif
1360 
rfind(unicode_char ch,size_type index) const1361 	UString::size_type UString::rfind( unicode_char ch, size_type index /*= 0 */ ) const
1362 	{
1363 		code_point cp[3] = {0, 0, 0};
1364 		size_t l = _utf32_to_utf16( ch, cp );
1365 		return rfind( UString( cp, l ), index );
1366 	}
1367 
find_first_of(const UString & str,size_type index,size_type num) const1368 	UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1369 	{
1370 		size_type i = 0;
1371 		const size_type len = length();
1372 		while ( i < num && ( index + i ) < len ) {
1373 			unicode_char ch = getChar( index + i );
1374 			if ( str.inString( ch ) )
1375 				return index + i;
1376 			i += _utf16_char_length( ch ); // increment by the Unicode character length
1377 		}
1378 		return npos;
1379 	}
1380 
find_first_of(code_point ch,size_type index) const1381 	UString::size_type UString::find_first_of( code_point ch, size_type index /*= 0 */ ) const
1382 	{
1383 		UString tmp;
1384 		tmp.assign( 1, ch );
1385 		return find_first_of( tmp, index );
1386 	}
1387 
find_first_of(char ch,size_type index) const1388 	UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
1389 	{
1390 		return find_first_of( static_cast<code_point>( ch ), index );
1391 	}
1392 
1393 #if MYGUI_IS_NATIVE_WCHAR_T
find_first_of(wchar_t ch,size_type index) const1394 	UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
1395 	{
1396 		return find_first_of( static_cast<unicode_char>( ch ), index );
1397 	}
1398 #endif
1399 
find_first_of(unicode_char ch,size_type index) const1400 	UString::size_type UString::find_first_of( unicode_char ch, size_type index /*= 0 */ ) const
1401 	{
1402 		code_point cp[3] = {0, 0, 0};
1403 		size_t l = _utf32_to_utf16( ch, cp );
1404 		return find_first_of( UString( cp, l ), index );
1405 	}
1406 
find_first_not_of(const UString & str,size_type index,size_type num) const1407 	UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1408 	{
1409 		size_type i = 0;
1410 		const size_type len = length();
1411 		while ( i < num && ( index + i ) < len ) {
1412 			unicode_char ch = getChar( index + i );
1413 			if ( !str.inString( ch ) )
1414 				return index + i;
1415 			i += _utf16_char_length( ch ); // increment by the Unicode character length
1416 		}
1417 		return npos;
1418 	}
1419 
find_first_not_of(code_point ch,size_type index) const1420 	UString::size_type UString::find_first_not_of( code_point ch, size_type index /*= 0 */ ) const
1421 	{
1422 		UString tmp;
1423 		tmp.assign( 1, ch );
1424 		return find_first_not_of( tmp, index );
1425 	}
1426 
find_first_not_of(char ch,size_type index) const1427 	UString::size_type UString::find_first_not_of( char ch, size_type index /*= 0 */ ) const
1428 	{
1429 		return find_first_not_of( static_cast<code_point>( ch ), index );
1430 	}
1431 
1432 #if MYGUI_IS_NATIVE_WCHAR_T
find_first_not_of(wchar_t ch,size_type index) const1433 	UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
1434 	{
1435 		return find_first_not_of( static_cast<unicode_char>( ch ), index );
1436 	}
1437 #endif
1438 
find_first_not_of(unicode_char ch,size_type index) const1439 	UString::size_type UString::find_first_not_of( unicode_char ch, size_type index /*= 0 */ ) const
1440 	{
1441 		code_point cp[3] = {0, 0, 0};
1442 		size_t l = _utf32_to_utf16( ch, cp );
1443 		return find_first_not_of( UString( cp, l ), index );
1444 	}
1445 
find_last_of(const UString & str,size_type index,size_type num) const1446 	UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1447 	{
1448 		size_type i = 0;
1449 		const size_type len = length();
1450 		if ( index > len ) index = len - 1;
1451 
1452 		while ( i < num && ( index - i ) != npos ) {
1453 			size_type j = index - i;
1454 			// careful to step full Unicode characters
1455 			if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1456 				j = index - ++i;
1457 			}
1458 			// and back to the usual dull test
1459 			unicode_char ch = getChar( j );
1460 			if ( str.inString( ch ) )
1461 				return j;
1462 			i++;
1463 		}
1464 		return npos;
1465 	}
1466 
find_last_of(code_point ch,size_type index) const1467 	UString::size_type UString::find_last_of( code_point ch, size_type index /*= npos */ ) const
1468 	{
1469 		UString tmp;
1470 		tmp.assign( 1, ch );
1471 		return find_last_of( tmp, index );
1472 	}
1473 
1474 #if MYGUI_IS_NATIVE_WCHAR_T
find_last_of(wchar_t ch,size_type index) const1475 	UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
1476 	{
1477 		return find_last_of( static_cast<unicode_char>( ch ), index );
1478 	}
1479 #endif
1480 
find_last_of(unicode_char ch,size_type index) const1481 	UString::size_type UString::find_last_of( unicode_char ch, size_type index /*= npos */ ) const
1482 	{
1483 		code_point cp[3] = {0, 0, 0};
1484 		size_t l = _utf32_to_utf16( ch, cp );
1485 		return find_last_of( UString( cp, l ), index );
1486 	}
1487 
find_last_not_of(const UString & str,size_type index,size_type num) const1488 	UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1489 	{
1490 		size_type i = 0;
1491 		const size_type len = length();
1492 		if ( index > len ) index = len - 1;
1493 
1494 		while ( i < num && ( index - i ) != npos ) {
1495 			size_type j = index - i;
1496 			// careful to step full Unicode characters
1497 			if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1498 				j = index - ++i;
1499 			}
1500 			// and back to the usual dull test
1501 			unicode_char ch = getChar( j );
1502 			if ( !str.inString( ch ) )
1503 				return j;
1504 			i++;
1505 		}
1506 		return npos;
1507 	}
1508 
find_last_not_of(code_point ch,size_type index) const1509 	UString::size_type UString::find_last_not_of( code_point ch, size_type index /*= npos */ ) const
1510 	{
1511 		UString tmp;
1512 		tmp.assign( 1, ch );
1513 		return find_last_not_of( tmp, index );
1514 	}
1515 
find_last_not_of(char ch,size_type index) const1516 	UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
1517 	{
1518 		return find_last_not_of( static_cast<code_point>( ch ), index );
1519 	}
1520 
1521 #if MYGUI_IS_NATIVE_WCHAR_T
find_last_not_of(wchar_t ch,size_type index) const1522 	UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
1523 	{
1524 		return find_last_not_of( static_cast<unicode_char>( ch ), index );
1525 	}
1526 #endif
1527 
find_last_not_of(unicode_char ch,size_type index) const1528 	UString::size_type UString::find_last_not_of( unicode_char ch, size_type index /*= npos */ ) const
1529 	{
1530 		code_point cp[3] = {0, 0, 0};
1531 		size_t l = _utf32_to_utf16( ch, cp );
1532 		return find_last_not_of( UString( cp, l ), index );
1533 	}
1534 
operator <(const UString & right) const1535 	bool UString::operator<( const UString& right ) const
1536 	{
1537 		return compare( right ) < 0;
1538 	}
1539 
operator <=(const UString & right) const1540 	bool UString::operator<=( const UString& right ) const
1541 	{
1542 		return compare( right ) <= 0;
1543 	}
1544 
operator =(const UString & s)1545 	UString& UString::operator=( const UString& s )
1546 	{
1547 		return assign( s );
1548 	}
1549 
operator =(code_point ch)1550 	UString& UString::operator=( code_point ch )
1551 	{
1552 		clear();
1553 		return append( 1, ch );
1554 	}
1555 
operator =(char ch)1556 	UString& UString::operator=( char ch )
1557 	{
1558 		clear();
1559 		return append( 1, ch );
1560 	}
1561 
1562 #if MYGUI_IS_NATIVE_WCHAR_T
operator =(wchar_t ch)1563 	UString& UString::operator=( wchar_t ch )
1564 	{
1565 		clear();
1566 		return append( 1, ch );
1567 	}
1568 #endif
1569 
operator =(unicode_char ch)1570 	UString& UString::operator=( unicode_char ch )
1571 	{
1572 		clear();
1573 		return append( 1, ch );
1574 	}
1575 
operator >(const UString & right) const1576 	bool UString::operator>( const UString& right ) const
1577 	{
1578 		return compare( right ) > 0;
1579 	}
1580 
operator >=(const UString & right) const1581 	bool UString::operator>=( const UString& right ) const
1582 	{
1583 		return compare( right ) >= 0;
1584 	}
1585 
operator ==(const UString & right) const1586 	bool UString::operator==( const UString& right ) const
1587 	{
1588 		return compare( right ) == 0;
1589 	}
1590 
operator !=(const UString & right) const1591 	bool UString::operator!=( const UString& right ) const
1592 	{
1593 		return !operator==( right );
1594 	}
1595 
operator [](size_type index)1596 	UString::code_point& UString::operator[]( size_type index )
1597 	{
1598 		return at( index );
1599 	}
1600 
operator [](size_type index) const1601 	const UString::code_point& UString::operator[]( size_type index ) const
1602 	{
1603 		return at( index );
1604 	}
1605 
operator std::string() const1606 	UString::operator std::string() const
1607 	{
1608 		return std::string( asUTF8() );
1609 	}
1610 
1611 	//! implicit cast to std::wstring
operator std::wstring() const1612 	UString::operator std::wstring() const
1613 	{
1614 		return std::wstring( asWStr() );
1615 	}
1616 
1617 
_utf16_independent_char(code_point cp)1618 	bool UString::_utf16_independent_char( code_point cp )
1619 	{
1620 		if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
1621 			return false; // it matches a surrogate pair signature
1622 		return true; // everything else is a standalone code point
1623 	}
1624 
_utf16_surrogate_lead(code_point cp)1625 	bool UString::_utf16_surrogate_lead( code_point cp )
1626 	{
1627 		if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
1628 			return true; // it is a 1st word
1629 		return false; // it isn't
1630 	}
1631 
_utf16_surrogate_follow(code_point cp)1632 	bool UString::_utf16_surrogate_follow( code_point cp )
1633 	{
1634 		if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
1635 			return true; // it is a 2nd word
1636 		return false; // everything else isn't
1637 	}
1638 
_utf16_char_length(code_point cp)1639 	size_t UString::_utf16_char_length( code_point cp )
1640 	{
1641 		if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
1642 			return 2; // if it is, then we are 2 words long
1643 		return 1; // otherwise we are only 1 word long
1644 	}
1645 
_utf16_char_length(unicode_char uc)1646 	size_t UString::_utf16_char_length( unicode_char uc )
1647 	{
1648 		if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
1649 			return 2; // if so, we need a surrogate pair
1650 		return 1; // otherwise we can stuff it into a single word
1651 	}
1652 
_utf16_to_utf32(const code_point in_cp[2],unicode_char & out_uc)1653 	size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
1654 	{
1655 		const code_point& cp1 = in_cp[0];
1656 		const code_point& cp2 = in_cp[1];
1657 		bool wordPair = false;
1658 
1659 		// does it look like a surrogate pair?
1660 		if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1661 			// looks like one, but does the other half match the algorithm as well?
1662 			if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1663 				wordPair = true; // yep!
1664 		}
1665 
1666 		if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
1667 			out_uc = cp1;
1668 			return 1;
1669 		}
1670 
1671 		unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
1672 		cU -= 0xD800; // remove the encoding markers
1673 		cL -= 0xDC00;
1674 
1675 		out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
1676 		out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
1677 		out_uc += 0x10000; // add back in the value offset
1678 
1679 		return 2; // this whole operation takes to words, so that's what we'll return
1680 	}
1681 
_utf32_to_utf16(const unicode_char & in_uc,code_point out_cp[2])1682 	size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
1683 	{
1684 		if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
1685 			out_cp[0] = static_cast<code_point>(in_uc);
1686 			return 1;
1687 		}
1688 		unicode_char uc = in_uc; // copy to writable buffer
1689 		unsigned short tmp; // single code point buffer
1690 		uc -= 0x10000; // subtract value offset
1691 
1692 		//process upper word
1693 		tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
1694 		tmp += 0xD800; // add encoding offset
1695 		out_cp[0] = tmp; // write
1696 
1697 		// process lower word
1698 		tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
1699 		tmp += 0xDC00; // add encoding offset
1700 		out_cp[1] = tmp; // write
1701 
1702 		return 2; // return used word count (2 for surrogate pairs)
1703 	}
1704 
_utf8_start_char(unsigned char cp)1705 	bool UString::_utf8_start_char( unsigned char cp )
1706 	{
1707 		return ( cp & ~_cont_mask ) != _cont;
1708 	}
1709 
_utf8_char_length(unsigned char cp)1710 	size_t UString::_utf8_char_length( unsigned char cp )
1711 	{
1712 		if ( !( cp & 0x80 ) ) return 1;
1713 		if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
1714 		if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
1715 		if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
1716 		if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
1717 		if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
1718 
1719 		return 1;
1720 		//throw invalid_data( "invalid UTF-8 sequence header value" );
1721 	}
1722 
_utf8_char_length(unicode_char uc)1723 	size_t UString::_utf8_char_length( unicode_char uc )
1724 	{
1725 		/*
1726 		7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
1727 		11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
1728 		16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1729 		21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1730 		26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1731 		31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1732 		*/
1733 		if ( !( uc & ~0x0000007F ) ) return 1;
1734 		if ( !( uc & ~0x000007FF ) ) return 2;
1735 		if ( !( uc & ~0x0000FFFF ) ) return 3;
1736 		if ( !( uc & ~0x001FFFFF ) ) return 4;
1737 		if ( !( uc & ~0x03FFFFFF ) ) return 5;
1738 		if ( !( uc & ~0x7FFFFFFF ) ) return 6;
1739 
1740 		return 1;
1741 		//throw invalid_data( "invalid UTF-32 value" );
1742 	}
1743 
_utf8_to_utf32(const unsigned char in_cp[6],unicode_char & out_uc)1744 	size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
1745 	{
1746 		size_t len = _utf8_char_length( in_cp[0] );
1747 		if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
1748 			out_uc = in_cp[0];
1749 			return 1;
1750 		}
1751 
1752 		unicode_char c = 0; // temporary buffer
1753 		size_t i = 0;
1754 		switch ( len ) { // load header byte
1755 			case 6:
1756 				c = in_cp[i] & _lead5_mask;
1757 				break;
1758 			case 5:
1759 				c = in_cp[i] & _lead4_mask;
1760 				break;
1761 			case 4:
1762 				c = in_cp[i] & _lead3_mask;
1763 				break;
1764 			case 3:
1765 				c = in_cp[i] & _lead2_mask;
1766 				break;
1767 			case 2:
1768 				c = in_cp[i] & _lead1_mask;
1769 				break;
1770 		}
1771 
1772 		// load each continuation byte
1773 		for ( ++i; i < len; i++ )
1774 		{
1775 			if (( in_cp[i] & ~_cont_mask ) != _cont )
1776 			{
1777 				//throw invalid_data( "bad UTF-8 continuation byte" );
1778 				out_uc = in_cp[0];
1779 				return 1;
1780 			}
1781 			c <<= 6;
1782 			c |= ( in_cp[i] & _cont_mask );
1783 		}
1784 
1785 		out_uc = c; // write the final value and return the used byte length
1786 		return len;
1787 	}
1788 
_utf32_to_utf8(const unicode_char & in_uc,unsigned char out_cp[6])1789 	size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
1790 	{
1791 		size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
1792 		unicode_char c = in_uc; // copy to temp buffer
1793 
1794 		//stuff all of the lower bits
1795 		for ( size_t i = len - 1; i > 0; i-- ) {
1796 			out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1797 			c >>= 6;
1798 		}
1799 
1800 		//now write the header byte
1801 		switch ( len ) {
1802 			case 6:
1803 				out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1804 				break;
1805 			case 5:
1806 				out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1807 				break;
1808 			case 4:
1809 				out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1810 				break;
1811 			case 3:
1812 				out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1813 				break;
1814 			case 2:
1815 				out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1816 				break;
1817 			case 1:
1818 			default:
1819 				out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
1820 				break;
1821 		}
1822 
1823 		// return the byte length of the sequence
1824 		return len;
1825 	}
1826 
_verifyUTF8(const unsigned char * c_str)1827 	UString::size_type UString::_verifyUTF8( const unsigned char* c_str )
1828 	{
1829 		std::string tmp( reinterpret_cast<const char*>( c_str ) );
1830 		return _verifyUTF8( tmp );
1831 	}
1832 
_verifyUTF8(const std::string & str)1833 	UString::size_type UString::_verifyUTF8( const std::string& str )
1834 	{
1835 		std::string::const_iterator i, ie = str.end();
1836 		i = str.begin();
1837 		size_type length = 0;
1838 
1839 		while ( i != ie ) {
1840 			// characters pass until we find an extended sequence
1841 			if (( *i ) & 0x80 ) {
1842 				unsigned char c = ( *i );
1843 				size_t contBytes = 0;
1844 
1845 				// get continuation byte count and test for overlong sequences
1846 				if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
1847 					if ( c == _lead1 )
1848 					{
1849 						//throw invalid_data( "overlong UTF-8 sequence" );
1850 						return str.size();
1851 					}
1852 					contBytes = 1;
1853 
1854 				} else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
1855 					contBytes = 2;
1856 					if ( c == _lead2 ) { // possible overlong UTF-8 sequence
1857 						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1858 						if (( c & _lead2 ) == _cont )
1859 						{
1860 							//throw invalid_data( "overlong UTF-8 sequence" );
1861 							return str.size();
1862 						}
1863 					}
1864 
1865 				} else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
1866 					contBytes = 3;
1867 					if ( c == _lead3 ) { // possible overlong UTF-8 sequence
1868 						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1869 						if (( c & _lead3 ) == _cont )
1870 						{
1871 							//throw invalid_data( "overlong UTF-8 sequence" );
1872 							return str.size();
1873 						}
1874 					}
1875 
1876 				} else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
1877 					contBytes = 4;
1878 					if ( c == _lead4 ) { // possible overlong UTF-8 sequence
1879 						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1880 						if (( c & _lead4 ) == _cont )
1881 						{
1882 							//throw invalid_data( "overlong UTF-8 sequence" );
1883 							return str.size();
1884 						}
1885 					}
1886 
1887 				} else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
1888 					contBytes = 5;
1889 					if ( c == _lead5 ) { // possible overlong UTF-8 sequence
1890 						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1891 						if (( c & _lead5 ) == _cont )
1892 						{
1893 							//throw invalid_data( "overlong UTF-8 sequence" );
1894 							return str.size();
1895 						}
1896 					}
1897 				}
1898 
1899 				// check remaining continuation bytes for
1900 				while ( contBytes-- ) {
1901 					c = ( *( ++i ) ); // get next byte in sequence
1902 					if (( c & ~_cont_mask ) != _cont )
1903 					{
1904 						//throw invalid_data( "bad UTF-8 continuation byte" );
1905 						return str.size();
1906 					}
1907 				}
1908 			}
1909 			length++;
1910 			i++;
1911 		}
1912 		return length;
1913 	}
1914 
_init()1915 	void UString::_init()
1916 	{
1917 		m_buffer.mVoidBuffer = nullptr;
1918 		m_bufferType = bt_none;
1919 		m_bufferSize = 0;
1920 	}
1921 
_cleanBuffer() const1922 	void UString::_cleanBuffer() const
1923 	{
1924 		if ( m_buffer.mVoidBuffer != nullptr ) {
1925 			switch ( m_bufferType ) {
1926 				case bt_string:
1927 					delete m_buffer.mStrBuffer;
1928 					break;
1929 				case bt_wstring:
1930 					delete m_buffer.mWStrBuffer;
1931 					break;
1932 				case bt_utf32string:
1933 					delete m_buffer.mUTF32StrBuffer;
1934 					break;
1935 				case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
1936 					//delete m_buffer.mVoidBuffer;
1937 					// delete void* is undefined, don't do that
1938 					assert("This should never happen - mVoidBuffer should never contain something if we "
1939 						"don't know the type");
1940 					break;
1941 			}
1942 			m_buffer.mVoidBuffer = nullptr;
1943 			m_bufferSize = 0;
1944 			m_bufferType = bt_none;
1945 		}
1946 	}
1947 
_getBufferStr() const1948 	void UString::_getBufferStr() const
1949 	{
1950 		if ( m_bufferType != bt_string ) {
1951 			_cleanBuffer();
1952 			m_buffer.mStrBuffer = new std::string();
1953 			m_bufferType = bt_string;
1954 		}
1955 		m_buffer.mStrBuffer->clear();
1956 	}
1957 
_getBufferWStr() const1958 	void UString::_getBufferWStr() const
1959 	{
1960 		if ( m_bufferType != bt_wstring ) {
1961 			_cleanBuffer();
1962 			m_buffer.mWStrBuffer = new std::wstring();
1963 			m_bufferType = bt_wstring;
1964 		}
1965 		m_buffer.mWStrBuffer->clear();
1966 	}
1967 
_getBufferUTF32Str() const1968 	void UString::_getBufferUTF32Str() const
1969 	{
1970 		if ( m_bufferType != bt_utf32string ) {
1971 			_cleanBuffer();
1972 			m_buffer.mUTF32StrBuffer = new utf32string();
1973 			m_bufferType = bt_utf32string;
1974 		}
1975 		m_buffer.mUTF32StrBuffer->clear();
1976 	}
1977 
_load_buffer_UTF8() const1978 	void UString::_load_buffer_UTF8() const
1979 	{
1980 		_getBufferStr();
1981 		std::string& buffer = ( *m_buffer.mStrBuffer );
1982 		buffer.reserve( length() );
1983 
1984 		unsigned char utf8buf[6];
1985 		char* charbuf = ( char* )utf8buf;
1986 		unicode_char c;
1987 		size_t len;
1988 
1989 		const_iterator i, ie = end();
1990 		for ( i = begin(); i != ie; i.moveNext() ) {
1991 			c = i.getCharacter();
1992 			len = _utf32_to_utf8( c, utf8buf );
1993 			size_t j = 0;
1994 			while ( j < len )
1995 				buffer.push_back( charbuf[j++] );
1996 		}
1997 	}
1998 
_load_buffer_WStr() const1999 	void UString::_load_buffer_WStr() const
2000 	{
2001 		_getBufferWStr();
2002 		std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2003 		buffer.reserve( length() ); // may over reserve, but should be close enough
2004 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2005 		const_iterator i, ie = end();
2006 		for ( i = begin(); i != ie; ++i ) {
2007 			buffer.push_back(( wchar_t )( *i ) );
2008 		}
2009 #else // wchar_t fits UTF-32
2010 		unicode_char c;
2011 		const_iterator i, ie = end();
2012 		for ( i = begin(); i != ie; i.moveNext() ) {
2013 			c = i.getCharacter();
2014 			buffer.push_back(( wchar_t )c );
2015 		}
2016 #endif
2017 	}
2018 
_load_buffer_UTF32() const2019 	void UString::_load_buffer_UTF32() const
2020 	{
2021 		_getBufferUTF32Str();
2022 		utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2023 		buffer.reserve( length() ); // may over reserve, but should be close enough
2024 
2025 		unicode_char c;
2026 
2027 		const_iterator i, ie = end();
2028 		for ( i = begin(); i != ie; i.moveNext() ) {
2029 			c = i.getCharacter();
2030 			buffer.push_back( c );
2031 		}
2032 	}
2033 
2034 } // namespace MyGUI
2035