MyGUIEngine/src/MyGUI_UString.cpp

/*
 * This source file is part of MyGUI. For the latest info, see http://mygui.info/
 * Distributed under the MIT License
 * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
 */

#include "MyGUI_Precompiled.h"
#include "MyGUI_UString.h"

namespace MyGUI
{

	//--------------------------------------------------------------------------
	UString::_base_iterator::_base_iterator()
	{
		mString = nullptr;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_seekFwd( size_type c )
	{
		mIter += c;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_seekRev( size_type c )
	{
		mIter -= c;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_become( const _base_iterator& i )
	{
		mIter = i.mIter;
		mString = i.mString;
	}
	//--------------------------------------------------------------------------
	bool UString::_base_iterator::_test_begin() const
	{
		return mIter == mString->mData.begin();
	}
	//--------------------------------------------------------------------------
	bool UString::_base_iterator::_test_end() const
	{
		return mIter == mString->mData.end();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::_base_iterator::_get_index() const
	{
		return mIter - mString->mData.begin();
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_jump_to( size_type index )
	{
		mIter = mString->mData.begin() + index;
	}
	//--------------------------------------------------------------------------
	UString::unicode_char UString::_base_iterator::_getCharacter() const
	{
		size_type current_index = _get_index();
		return mString->getChar( current_index );
	}
	//--------------------------------------------------------------------------
	int UString::_base_iterator::_setCharacter( unicode_char uc )
	{
		size_type current_index = _get_index();
		int change = mString->setChar( current_index, uc );
		_jump_to( current_index );
		return change;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_moveNext()
	{
		_seekFwd( 1 ); // move 1 code point forward
		if ( _test_end() ) return; // exit if we hit the end
		if ( _utf16_surrogate_follow( mIter[0] ) ) {
			// landing on a follow code point means we might be part of a bigger character
			// so we test for that
			code_point lead_half = 0;
			//NB: we can't possibly be at the beginning here, so no need to test
			lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
			if ( _utf16_surrogate_lead( lead_half ) ) {
				_seekFwd( 1 ); // if so, then advance 1 more code point
			}
		}
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_movePrev()
	{
		_seekRev( 1 ); // move 1 code point backwards
		if ( _test_begin() ) return; // exit if we hit the beginning
		if ( _utf16_surrogate_follow( mIter[0] ) ) {
			// landing on a follow code point means we might be part of a bigger character
			// so we test for that
			code_point lead_half = 0;
			lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
			if ( _utf16_surrogate_lead( lead_half ) ) {
				_seekRev( 1 ); // if so, then rewind 1 more code point
			}
		}
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_fwd_iterator::_fwd_iterator() = default;
	//--------------------------------------------------------------------------
	UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i )
	{
		_become( i );
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator=( const _fwd_iterator& i )
	{
		_become( i );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator++()
	{
		_seekFwd( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator++( int )
	{
		_fwd_iterator tmp( *this );
		_seekFwd( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator--()
	{
		_seekRev( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator--( int )
	{
		_fwd_iterator tmp( *this );
		_seekRev( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n )
	{
		_fwd_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekRev( -n );
		else
			tmp._seekFwd( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n )
	{
		_fwd_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekFwd( -n );
		else
			tmp._seekRev( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n )
	{
		if ( n < 0 )
			_seekRev( -n );
		else
			_seekFwd( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n )
	{
		if ( n < 0 )
			_seekFwd( -n );
		else
			_seekRev( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_fwd_iterator::operator*() const
	{
		return *mIter;
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const
	{
		_fwd_iterator tmp( *this );
		tmp += n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::moveNext()
	{
		_moveNext();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::movePrev()
	{
		_movePrev();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::unicode_char UString::_fwd_iterator::getCharacter() const
	{
		return _getCharacter();
	}
	//--------------------------------------------------------------------------
	int UString::_fwd_iterator::setCharacter( unicode_char uc )
	{
		return _setCharacter( uc );
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator::_const_fwd_iterator() = default;
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i )
	{
		_become( i );
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator=( const _const_fwd_iterator& i )
	{
		_become( i );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i )
	{
		_become( i );
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++()
	{
		_seekFwd( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int )
	{
		_const_fwd_iterator tmp( *this );
		_seekFwd( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--()
	{
		_seekRev( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int )
	{
		_const_fwd_iterator tmp( *this );
		_seekRev( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n )
	{
		_const_fwd_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekRev( -n );
		else
			tmp._seekFwd( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n )
	{
		_const_fwd_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekFwd( -n );
		else
			tmp._seekRev( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n )
	{
		if ( n < 0 )
			_seekRev( -n );
		else
			_seekFwd( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n )
	{
		if ( n < 0 )
			_seekFwd( -n );
		else
			_seekRev( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_fwd_iterator::operator*() const
	{
		return *mIter;
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const
	{
		_const_fwd_iterator tmp( *this );
		tmp += n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext()
	{
		_moveNext();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev()
	{
		_movePrev();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::unicode_char UString::_const_fwd_iterator::getCharacter() const
	{
		return _getCharacter();
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_rev_iterator::_rev_iterator() = default;
	//--------------------------------------------------------------------------
	UString::_rev_iterator::_rev_iterator( const _rev_iterator& i )
	{
		_become( i );
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator++()
	{
		_seekRev( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator++( int )
	{
		_rev_iterator tmp( *this );
		_seekRev( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator--()
	{
		_seekFwd( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator--( int )
	{
		_rev_iterator tmp( *this );
		_seekFwd( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n )
	{
		_rev_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekFwd( -n );
		else
			tmp._seekRev( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n )
	{
		_rev_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekRev( -n );
		else
			tmp._seekFwd( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n )
	{
		if ( n < 0 )
			_seekFwd( -n );
		else
			_seekRev( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n )
	{
		if ( n < 0 )
			_seekRev( -n );
		else
			_seekFwd( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_rev_iterator::operator*() const
	{
		return mIter[-1];
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const
	{
		_rev_iterator tmp( *this );
		tmp -= n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator::_const_rev_iterator() = default;
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i )
	{
		_become( i );
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i )
	{
		_become( i );
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator++()
	{
		_seekRev( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int )
	{
		_const_rev_iterator tmp( *this );
		_seekRev( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator--()
	{
		_seekFwd( 1 );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int )
	{
		_const_rev_iterator tmp( *this );
		_seekFwd( 1 );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n )
	{
		_const_rev_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekFwd( -n );
		else
			tmp._seekRev( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n )
	{
		_const_rev_iterator tmp( *this );
		if ( n < 0 )
			tmp._seekRev( -n );
		else
			tmp._seekFwd( n );
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n )
	{
		if ( n < 0 )
			_seekFwd( -n );
		else
			_seekRev( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n )
	{
		if ( n < 0 )
			_seekRev( -n );
		else
			_seekFwd( n );
		return *this;
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_rev_iterator::operator*() const
	{
		return mIter[-1];
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const
	{
		_const_rev_iterator tmp( *this );
		tmp -= n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::UString()
	{
		_init();
	}
	//--------------------------------------------------------------------------
	UString::UString( const UString& copy )
	{
		_init();
		mData = copy.mData;
	}
	//--------------------------------------------------------------------------
	UString::UString( size_type length, const code_point& ch )
	{
		_init();
		assign( length, ch );
	}
	//--------------------------------------------------------------------------
	UString::UString( const code_point* str )
	{
		_init();
		assign( str );
	}
	//--------------------------------------------------------------------------
	UString::UString( const code_point* str, size_type length )
	{
		_init();
		assign( str, length );
	}
	//--------------------------------------------------------------------------
	UString::UString( const UString& str, size_type index, size_type length )
	{
		_init();
		assign( str, index, length );
	}
	//--------------------------------------------------------------------------
#if MYGUI_IS_NATIVE_WCHAR_T
	UString::UString( const wchar_t* w_str )
	{
		_init();
		assign( w_str );
	}
	//--------------------------------------------------------------------------
	UString::UString( const wchar_t* w_str, size_type length )
	{
		_init();
		assign( w_str, length );
	}
#endif
	//--------------------------------------------------------------------------
	UString::UString( const std::wstring& wstr )
	{
		_init();
		assign( wstr );
	}
	//--------------------------------------------------------------------------
	UString::UString( const char* c_str )
	{
		_init();
		assign( c_str );
	}
	//--------------------------------------------------------------------------
	UString::UString( const char* c_str, size_type length )
	{
		_init();
		assign( c_str, length );
	}
	//--------------------------------------------------------------------------
	UString::UString( const std::string& str )
	{
		_init();
		assign( str );
	}
	//--------------------------------------------------------------------------
	UString::~UString()
	{
		_cleanBuffer();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::size() const
	{
		return mData.size();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::length() const
	{
		return size();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::length_Characters() const
	{
		const_iterator i = begin(), ie = end();
		size_type c = 0;
		while ( i != ie ) {
			i.moveNext();
			++c;
		}
		return c;
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::max_size() const
	{
		return mData.max_size();
	}
	//--------------------------------------------------------------------------
	void UString::reserve( size_type size )
	{
		mData.reserve( size );
	}
	//--------------------------------------------------------------------------
	void UString::resize( size_type num, const code_point& val /*= 0 */ )
	{
		mData.resize( num, val );
	}
	//--------------------------------------------------------------------------
	void UString::swap( UString& from )
	{
		mData.swap( from.mData );
	}
	//--------------------------------------------------------------------------
	bool UString::empty() const
	{
		return mData.empty();
	}
	//--------------------------------------------------------------------------
	const UString::code_point* UString::c_str() const
	{
		return mData.c_str();
	}
	//--------------------------------------------------------------------------
	const UString::code_point* UString::data() const
	{
		return c_str();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::capacity() const
	{
		return mData.capacity();
	}
	//--------------------------------------------------------------------------
	void UString::clear()
	{
		mData.clear();
	}
	//--------------------------------------------------------------------------
	UString UString::substr( size_type index, size_type num /*= npos */ ) const
	{
		// this could avoid the extra copy if we used a private specialty constructor
		dstring data = mData.substr( index, num );
		UString tmp;
		tmp.mData.swap( data );
		return tmp;
	}
	//--------------------------------------------------------------------------
	void UString::push_back( unicode_char val )
	{
		code_point cp[2];
		size_t c = _utf32_to_utf16( val, cp );
		if ( c > 0 ) push_back( cp[0] );
		if ( c > 1 ) push_back( cp[1] );
	}
	//--------------------------------------------------------------------------
#if MYGUI_IS_NATIVE_WCHAR_T
	void UString::push_back( wchar_t val )
	{
		// we do this because the Unicode method still preserves UTF-16 code points
		mData.push_back( static_cast<code_point>( val ) );
	}
#endif
	//--------------------------------------------------------------------------
	void UString::push_back( code_point val )
	{
		mData.push_back( val );
	}

	void UString::push_back( char val )
	{
		mData.push_back( static_cast<code_point>( val ) );
	}

	bool UString::inString( unicode_char ch ) const
	{
		const_iterator i, ie = end();
		for ( i = begin(); i != ie; i.moveNext() ) {
			if ( i.getCharacter() == ch )
				return true;
		}
		return false;
	}

	const std::string& UString::asUTF8() const
	{
		_load_buffer_UTF8();
		return *m_buffer.mStrBuffer;
	}

	const char* UString::asUTF8_c_str() const
	{
		_load_buffer_UTF8();
		return m_buffer.mStrBuffer->c_str();
	}

	const UString::utf32string& UString::asUTF32() const
	{
		_load_buffer_UTF32();
		return *m_buffer.mUTF32StrBuffer;
	}

	const UString::unicode_char* UString::asUTF32_c_str() const
	{
		_load_buffer_UTF32();
		return m_buffer.mUTF32StrBuffer->c_str();
	}

	const std::wstring& UString::asWStr() const
	{
		_load_buffer_WStr();
		return *m_buffer.mWStrBuffer;
	}

	const wchar_t* UString::asWStr_c_str() const
	{
		_load_buffer_WStr();
		return m_buffer.mWStrBuffer->c_str();
	}

	UString::code_point& UString::at( size_type loc )
	{
		return mData.at( loc );
	}

	const UString::code_point& UString::at( size_type loc ) const
	{
		return mData.at( loc );
	}

	UString::unicode_char UString::getChar( size_type loc ) const
	{
		const code_point* ptr = c_str();
		unicode_char uc;
		size_t l = _utf16_char_length( ptr[loc] );
		code_point cp[2] = { /* blame the code beautifier */
			0, 0
		};
		cp[0] = ptr[loc];

		if ( l == 2 && ( loc + 1 ) < mData.length() ) {
			cp[1] = ptr[loc+1];
		}
		_utf16_to_utf32( cp, uc );
		return uc;
	}

	int UString::setChar( size_type loc, unicode_char ch )
	{
		code_point cp[2] = { /* blame the code beautifier */
			0, 0
		};
		size_t l = _utf32_to_utf16( ch, cp );
		unicode_char existingChar = getChar( loc );
		size_t existingSize = _utf16_char_length( existingChar );
		size_t newSize = _utf16_char_length( ch );

		if ( newSize > existingSize ) {
			at( loc ) = cp[0];
			insert( loc + 1, 1, cp[1] );
			return 1;
		}
		if ( newSize < existingSize ) {
			erase( loc, 1 );
			at( loc ) = cp[0];
			return -1;
		}

		// newSize == existingSize
		at( loc ) = cp[0];
		if ( l == 2 ) at( loc + 1 ) = cp[1];
		return 0;
	}

	UString::iterator UString::begin()
	{
		iterator i;
		i.mIter = mData.begin();
		i.mString = this;
		return i;
	}

	UString::const_iterator UString::begin() const
	{
		const_iterator i;
		i.mIter = const_cast<UString*>( this )->mData.begin();
		i.mString = const_cast<UString*>( this );
		return i;
	}

	UString::iterator UString::end()
	{
		iterator i;
		i.mIter = mData.end();
		i.mString = this;
		return i;
	}

	UString::const_iterator UString::end() const
	{
		const_iterator i;
		i.mIter = const_cast<UString*>( this )->mData.end();
		i.mString = const_cast<UString*>( this );
		return i;
	}

	UString::reverse_iterator UString::rbegin()
	{
		reverse_iterator i;
		i.mIter = mData.end();
		i.mString = this;
		return i;
	}

	UString::const_reverse_iterator UString::rbegin() const
	{
		const_reverse_iterator i;
		i.mIter = const_cast<UString*>( this )->mData.end();
		i.mString = const_cast<UString*>( this );
		return i;
	}

	UString::reverse_iterator UString::rend()
	{
		reverse_iterator i;
		i.mIter = mData.begin();
		i.mString = this;
		return i;
	}

	UString::const_reverse_iterator UString::rend() const
	{
		const_reverse_iterator i;
		i.mIter = const_cast<UString*>( this )->mData.begin();
		i.mString = const_cast<UString*>( this );
		return i;
	}

	UString& UString::assign( iterator start, iterator end )
	{
		mData.assign( start.mIter, end.mIter );
		return *this;
	}

	UString& UString::assign( const UString& str )
	{
		mData.assign( str.mData );
		return *this;
	}

	UString& UString::assign( const code_point* str )
	{
		mData.assign( str );
		return *this;
	}

	UString& UString::assign( const code_point* str, size_type num )
	{
		mData.assign( str, num );
		return *this;
	}

	UString& UString::assign( const UString& str, size_type index, size_type len )
	{
		mData.assign( str.mData, index, len );
		return *this;
	}

	UString& UString::assign( size_type num, const code_point& ch )
	{
		mData.assign( num, ch );
		return *this;
	}

	UString& UString::assign( const std::wstring& wstr )
	{
		mData.clear();
		mData.reserve( wstr.length() ); // best guess bulk allocate
#ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
		code_point tmp;
		std::wstring::const_iterator i, ie = wstr.end();
		for ( i = wstr.begin(); i != ie; i++ ) {
			tmp = static_cast<code_point>( *i );
			mData.push_back( tmp );
		}
#else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
		code_point cp[3] = {0, 0, 0};
		unicode_char tmp;
		std::wstring::const_iterator i, ie = wstr.end();
		for ( i = wstr.begin(); i != ie; i++ ) {
			tmp = static_cast<unicode_char>( *i );
			size_t l = _utf32_to_utf16( tmp, cp );
			if ( l > 0 ) mData.push_back( cp[0] );
			if ( l > 1 ) mData.push_back( cp[1] );
		}
#endif
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::assign( const wchar_t* w_str )
	{
		std::wstring tmp;
		tmp.assign( w_str );
		return assign( tmp );
	}

	UString& UString::assign( const wchar_t* w_str, size_type num )
	{
		std::wstring tmp;
		tmp.assign( w_str, num );
		return assign( tmp );
	}
#endif

	UString& UString::assign( const std::string& str )
	{
		size_type len = _verifyUTF8( str );
		clear(); // empty our contents, if there are any
		reserve( len ); // best guess bulk capacity growth

		// This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
		// then converting it to UTF-16, then finally appending the data buffer

		unicode_char uc;          // temporary Unicode character buffer
		unsigned char utf8buf[7]; // temporary UTF-8 buffer
		utf8buf[6] = 0;
		size_t utf8len;           // UTF-8 length
		code_point utf16buff[3];  // temporary UTF-16 buffer
		utf16buff[2] = 0;
		size_t utf16len;          // UTF-16 length

		std::string::const_iterator i, ie = str.end();
		for ( i = str.begin(); i != ie; i++ ) {
			utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
			for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
				utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
			}
			utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
			utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
			i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop

			utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
			append( utf16buff, utf16len ); // append the characters to the string
		}
		return *this;
	}

	UString& UString::assign( const char* c_str )
	{
		std::string tmp( c_str );
		return assign( tmp );
	}

	UString& UString::assign( const char* c_str, size_type num )
	{
		std::string tmp;
		tmp.assign( c_str, num );
		return assign( tmp );
	}

	UString& UString::append( const UString& str )
	{
		mData.append( str.mData );
		return *this;
	}

	UString& UString::append( const code_point* str )
	{
		mData.append( str );
		return *this;
	}

	UString& UString::append( const UString& str, size_type index, size_type len )
	{
		mData.append( str.mData, index, len );
		return *this;
	}

	UString& UString::append( const code_point* str, size_type num )
	{
		mData.append( str, num );
		return *this;
	}

	UString& UString::append( size_type num, code_point ch )
	{
		mData.append( num, ch );
		return *this;
	}

	UString& UString::append( iterator start, iterator end )
	{
		mData.append( start.mIter, end.mIter );
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::append( const wchar_t* w_str, size_type num )
	{
		std::wstring tmp( w_str, num );
		return append( tmp );
	}

	UString& UString::append( size_type num, wchar_t ch )
	{
		return append( num, static_cast<unicode_char>( ch ) );
	}
#endif
	UString& UString::append( const char* c_str, size_type num )
	{
		UString tmp( c_str, num );
		append( tmp );
		return *this;
	}

	UString& UString::append( size_type num, char ch )
	{
		append( num, static_cast<code_point>( ch ) );
		return *this;
	}

	UString& UString::append( size_type num, unicode_char ch )
	{
		code_point cp[2] = {0, 0};
		if ( _utf32_to_utf16( ch, cp ) == 2 ) {
			for ( size_type i = 0; i < num; i++ ) {
				append( 1, cp[0] );
				append( 1, cp[1] );
			}
		} else {
			for ( size_type i = 0; i < num; i++ ) {
				append( 1, cp[0] );
			}
		}
		return *this;
	}

	UString::iterator UString::insert( iterator i, const code_point& ch )
	{
		iterator ret;
		ret.mIter = mData.insert( i.mIter, ch );
		ret.mString = this;
		return ret;
	}

	UString& UString::insert( size_type index, const UString& str )
	{
		mData.insert( index, str.mData );
		return *this;
	}

	UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
	{
		mData.insert( index1, str.mData, index2, num );
		return *this;
	}

	void UString::insert( iterator i, iterator start, iterator end )
	{
		mData.insert( i.mIter, start.mIter, end.mIter );
	}

	UString& UString::insert( size_type index, const code_point* str, size_type num )
	{
		mData.insert( index, str, num );
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
	{
		UString tmp( w_str, num );
		insert( index, tmp );
		return *this;
	}
#endif

	UString& UString::insert( size_type index, const char* c_str, size_type num )
	{
		UString tmp( c_str, num );
		insert( index, tmp );
		return *this;
	}

	UString& UString::insert( size_type index, size_type num, code_point ch )
	{
		mData.insert( index, num, ch );
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::insert( size_type index, size_type num, wchar_t ch )
	{
		insert( index, num, static_cast<unicode_char>( ch ) );
		return *this;
	}
#endif

	UString& UString::insert( size_type index, size_type num, char ch )
	{
		insert( index, num, static_cast<code_point>( ch ) );
		return *this;
	}

	UString& UString::insert( size_type index, size_type num, unicode_char ch )
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		if ( l == 1 ) {
			return insert( index, num, cp[0] );
		}
		for ( size_type c = 0; c < num; c++ ) {
			// insert in reverse order to preserve ordering after insert
			insert( index, 1, cp[1] );
			insert( index, 1, cp[0] );
		}
		return *this;
	}

	void UString::insert( iterator i, size_type num, const code_point& ch )
	{
		mData.insert( i.mIter, num, ch );
	}
#if MYGUI_IS_NATIVE_WCHAR_T
	void UString::insert( iterator i, size_type num, const wchar_t& ch )
	{
		insert( i, num, static_cast<unicode_char>( ch ) );
	}
#endif

	void UString::insert( iterator i, size_type num, const char& ch )
	{
		insert( i, num, static_cast<code_point>( ch ) );
	}

	void UString::insert( iterator i, size_type num, const unicode_char& ch )
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		if ( l == 1 ) {
			insert( i, num, cp[0] );
		} else {
			for ( size_type c = 0; c < num; c++ ) {
				// insert in reverse order to preserve ordering after insert
				insert( i, 1, cp[1] );
				insert( i, 1, cp[0] );
			}
		}
	}

	UString::iterator UString::erase( iterator loc )
	{
		iterator ret;
		ret.mIter = mData.erase( loc.mIter );
		ret.mString = this;
		return ret;
	}

	UString::iterator UString::erase( iterator start, iterator end )
	{
		iterator ret;
		ret.mIter = mData.erase( start.mIter, end.mIter );
		ret.mString = this;
		return ret;
	}

	UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
	{
		if ( num == npos )
			mData.erase( index );
		else
			mData.erase( index, num );
		return *this;
	}

	UString& UString::replace( size_type index1, size_type num1, const UString& str )
	{
		mData.replace( index1, num1, str.mData, 0, npos );
		return *this;
	}

	UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
	{
		mData.replace( index1, num1, str.mData, 0, num2 );
		return *this;
	}

	UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
	{
		mData.replace( index1, num1, str.mData, index2, num2 );
		return *this;
	}

	UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
	{
		_const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload

		size_type index1 = begin() - st;
		size_type num1 = end - st;
		return replace( index1, num1, str, 0, num );
	}

	UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch )
	{
		mData.replace( index, num1, num2, ch );
		return *this;
	}

	UString& UString::replace( iterator start, iterator end, size_type num, code_point ch )
	{
		_const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload

		size_type index1 = begin() - st;
		size_type num1 = end - st;
		return replace( index1, num1, num, ch );
	}

	int UString::compare( const UString& str ) const
	{
		return mData.compare( str.mData );
	}

	int UString::compare( const code_point* str ) const
	{
		return mData.compare( str );
	}

	int UString::compare( size_type index, size_type length, const UString& str ) const
	{
		return mData.compare( index, length, str.mData );
	}

	int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
	{
		return mData.compare( index, length, str.mData, index2, length2 );
	}

	int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
	{
		return mData.compare( index, length, str, length2 );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
	{
		UString tmp( w_str, length2 );
		return compare( index, length, tmp );
	}
#endif

	int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
	{
		UString tmp( c_str, length2 );
		return compare( index, length, tmp );
	}

	UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
	{
		return mData.find( str.c_str(), index );
	}

	UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const
	{
		UString tmp( cp_str );
		return mData.find( tmp.c_str(), index, length );
	}

	UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const
	{
		UString tmp( c_str );
		return mData.find( tmp.c_str(), index, length );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
	{
		UString tmp( w_str );
		return mData.find( tmp.c_str(), index, length );
	}
#endif

	UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
	{
		return find( static_cast<code_point>( ch ), index );
	}

	UString::size_type UString::find( code_point ch, size_type index /*= 0 */ ) const
	{
		return mData.find( ch, index );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
	{
		return find( static_cast<unicode_char>( ch ), index );
	}
#endif

	UString::size_type UString::find( unicode_char ch, size_type index /*= 0 */ ) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		return find( UString( cp, l ), index );
	}

	UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
	{
		return mData.rfind( str.c_str(), index );
	}

	UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const
	{
		UString tmp( cp_str );
		return mData.rfind( tmp.c_str(), index, num );
	}

	UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
	{
		UString tmp( c_str );
		return mData.rfind( tmp.c_str(), index, num );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
	{
		UString tmp( w_str );
		return mData.rfind( tmp.c_str(), index, num );
	}
#endif

	UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
	{
		return rfind( static_cast<code_point>( ch ), index );
	}

	UString::size_type UString::rfind( code_point ch, size_type index ) const
	{
		return mData.rfind( ch, index );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
	{
		return rfind( static_cast<unicode_char>( ch ), index );
	}
#endif

	UString::size_type UString::rfind( unicode_char ch, size_type index /*= 0 */ ) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		return rfind( UString( cp, l ), index );
	}

	UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
	{
		size_type i = 0;
		const size_type len = length();
		while ( i < num && ( index + i ) < len ) {
			unicode_char ch = getChar( index + i );
			if ( str.inString( ch ) )
				return index + i;
			i += _utf16_char_length( ch ); // increment by the Unicode character length
		}
		return npos;
	}

	UString::size_type UString::find_first_of( code_point ch, size_type index /*= 0 */ ) const
	{
		UString tmp;
		tmp.assign( 1, ch );
		return find_first_of( tmp, index );
	}

	UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
	{
		return find_first_of( static_cast<code_point>( ch ), index );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
	{
		return find_first_of( static_cast<unicode_char>( ch ), index );
	}
#endif

	UString::size_type UString::find_first_of( unicode_char ch, size_type index /*= 0 */ ) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		return find_first_of( UString( cp, l ), index );
	}

	UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
	{
		size_type i = 0;
		const size_type len = length();
		while ( i < num && ( index + i ) < len ) {
			unicode_char ch = getChar( index + i );
			if ( !str.inString( ch ) )
				return index + i;
			i += _utf16_char_length( ch ); // increment by the Unicode character length
		}
		return npos;
	}

	UString::size_type UString::find_first_not_of( code_point ch, size_type index /*= 0 */ ) const
	{
		UString tmp;
		tmp.assign( 1, ch );
		return find_first_not_of( tmp, index );
	}

	UString::size_type UString::find_first_not_of( char ch, size_type index /*= 0 */ ) const
	{
		return find_first_not_of( static_cast<code_point>( ch ), index );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
	{
		return find_first_not_of( static_cast<unicode_char>( ch ), index );
	}
#endif

	UString::size_type UString::find_first_not_of( unicode_char ch, size_type index /*= 0 */ ) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		return find_first_not_of( UString( cp, l ), index );
	}

	UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
	{
		size_type i = 0;
		const size_type len = length();
		if ( index > len ) index = len - 1;

		while ( i < num && ( index - i ) != npos ) {
			size_type j = index - i;
			// careful to step full Unicode characters
			if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
				j = index - ++i;
			}
			// and back to the usual dull test
			unicode_char ch = getChar( j );
			if ( str.inString( ch ) )
				return j;
			i++;
		}
		return npos;
	}

	UString::size_type UString::find_last_of( code_point ch, size_type index /*= npos */ ) const
	{
		UString tmp;
		tmp.assign( 1, ch );
		return find_last_of( tmp, index );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
	{
		return find_last_of( static_cast<unicode_char>( ch ), index );
	}
#endif

	UString::size_type UString::find_last_of( unicode_char ch, size_type index /*= npos */ ) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		return find_last_of( UString( cp, l ), index );
	}

	UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
	{
		size_type i = 0;
		const size_type len = length();
		if ( index > len ) index = len - 1;

		while ( i < num && ( index - i ) != npos ) {
			size_type j = index - i;
			// careful to step full Unicode characters
			if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
				j = index - ++i;
			}
			// and back to the usual dull test
			unicode_char ch = getChar( j );
			if ( !str.inString( ch ) )
				return j;
			i++;
		}
		return npos;
	}

	UString::size_type UString::find_last_not_of( code_point ch, size_type index /*= npos */ ) const
	{
		UString tmp;
		tmp.assign( 1, ch );
		return find_last_not_of( tmp, index );
	}

	UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
	{
		return find_last_not_of( static_cast<code_point>( ch ), index );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
	{
		return find_last_not_of( static_cast<unicode_char>( ch ), index );
	}
#endif

	UString::size_type UString::find_last_not_of( unicode_char ch, size_type index /*= npos */ ) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16( ch, cp );
		return find_last_not_of( UString( cp, l ), index );
	}

	bool UString::operator<( const UString& right ) const
	{
		return compare( right ) < 0;
	}

	bool UString::operator<=( const UString& right ) const
	{
		return compare( right ) <= 0;
	}

	UString& UString::operator=( const UString& s )
	{
		return assign( s );
	}

	UString& UString::operator=( code_point ch )
	{
		clear();
		return append( 1, ch );
	}

	UString& UString::operator=( char ch )
	{
		clear();
		return append( 1, ch );
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::operator=( wchar_t ch )
	{
		clear();
		return append( 1, ch );
	}
#endif

	UString& UString::operator=( unicode_char ch )
	{
		clear();
		return append( 1, ch );
	}

	bool UString::operator>( const UString& right ) const
	{
		return compare( right ) > 0;
	}

	bool UString::operator>=( const UString& right ) const
	{
		return compare( right ) >= 0;
	}

	bool UString::operator==( const UString& right ) const
	{
		return compare( right ) == 0;
	}

	bool UString::operator!=( const UString& right ) const
	{
		return !operator==( right );
	}

	UString::code_point& UString::operator[]( size_type index )
	{
		return at( index );
	}

	const UString::code_point& UString::operator[]( size_type index ) const
	{
		return at( index );
	}

	UString::operator std::string() const
	{
		return std::string( asUTF8() );
	}

	//! implicit cast to std::wstring
	UString::operator std::wstring() const
	{
		return std::wstring( asWStr() );
	}


	bool UString::_utf16_independent_char( code_point cp )
	{
		if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
			return false; // it matches a surrogate pair signature
		return true; // everything else is a standalone code point
	}

	bool UString::_utf16_surrogate_lead( code_point cp )
	{
		if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
			return true; // it is a 1st word
		return false; // it isn't
	}

	bool UString::_utf16_surrogate_follow( code_point cp )
	{
		if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
			return true; // it is a 2nd word
		return false; // everything else isn't
	}

	size_t UString::_utf16_char_length( code_point cp )
	{
		if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
			return 2; // if it is, then we are 2 words long
		return 1; // otherwise we are only 1 word long
	}

	size_t UString::_utf16_char_length( unicode_char uc )
	{
		if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
			return 2; // if so, we need a surrogate pair
		return 1; // otherwise we can stuff it into a single word
	}

	size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
	{
		const code_point& cp1 = in_cp[0];
		const code_point& cp2 = in_cp[1];
		bool wordPair = false;

		// does it look like a surrogate pair?
		if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
			// looks like one, but does the other half match the algorithm as well?
			if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
				wordPair = true; // yep!
		}

		if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
			out_uc = cp1;
			return 1;
		}

		unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
		cU -= 0xD800; // remove the encoding markers
		cL -= 0xDC00;

		out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
		out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
		out_uc += 0x10000; // add back in the value offset

		return 2; // this whole operation takes to words, so that's what we'll return
	}

	size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
	{
		if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
			out_cp[0] = static_cast<code_point>(in_uc);
			return 1;
		}
		unicode_char uc = in_uc; // copy to writable buffer
		unsigned short tmp; // single code point buffer
		uc -= 0x10000; // subtract value offset

		//process upper word
		tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
		tmp += 0xD800; // add encoding offset
		out_cp[0] = tmp; // write

		// process lower word
		tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
		tmp += 0xDC00; // add encoding offset
		out_cp[1] = tmp; // write

		return 2; // return used word count (2 for surrogate pairs)
	}

	bool UString::_utf8_start_char( unsigned char cp )
	{
		return ( cp & ~_cont_mask ) != _cont;
	}

	size_t UString::_utf8_char_length( unsigned char cp )
	{
		if ( !( cp & 0x80 ) ) return 1;
		if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
		if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
		if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
		if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
		if (( cp & ~_lead5_mask ) == _lead5 ) return 6;

		return 1;
		//throw invalid_data( "invalid UTF-8 sequence header value" );
	}

	size_t UString::_utf8_char_length( unicode_char uc )
	{
		/*
		7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
		11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
		16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
		21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
		26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
		31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
		*/
		if ( !( uc & ~0x0000007F ) ) return 1;
		if ( !( uc & ~0x000007FF ) ) return 2;
		if ( !( uc & ~0x0000FFFF ) ) return 3;
		if ( !( uc & ~0x001FFFFF ) ) return 4;
		if ( !( uc & ~0x03FFFFFF ) ) return 5;
		if ( !( uc & ~0x7FFFFFFF ) ) return 6;

		return 1;
		//throw invalid_data( "invalid UTF-32 value" );
	}

	size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
	{
		size_t len = _utf8_char_length( in_cp[0] );
		if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
			out_uc = in_cp[0];
			return 1;
		}

		unicode_char c = 0; // temporary buffer
		size_t i = 0;
		switch ( len ) { // load header byte
			case 6:
				c = in_cp[i] & _lead5_mask;
				break;
			case 5:
				c = in_cp[i] & _lead4_mask;
				break;
			case 4:
				c = in_cp[i] & _lead3_mask;
				break;
			case 3:
				c = in_cp[i] & _lead2_mask;
				break;
			case 2:
				c = in_cp[i] & _lead1_mask;
				break;
		}

		// load each continuation byte
		for ( ++i; i < len; i++ )
		{
			if (( in_cp[i] & ~_cont_mask ) != _cont )
			{
				//throw invalid_data( "bad UTF-8 continuation byte" );
				out_uc = in_cp[0];
				return 1;
			}
			c <<= 6;
			c |= ( in_cp[i] & _cont_mask );
		}

		out_uc = c; // write the final value and return the used byte length
		return len;
	}

	size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
	{
		size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
		unicode_char c = in_uc; // copy to temp buffer

		//stuff all of the lower bits
		for ( size_t i = len - 1; i > 0; i-- ) {
			out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
			c >>= 6;
		}

		//now write the header byte
		switch ( len ) {
			case 6:
				out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
				break;
			case 5:
				out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
				break;
			case 4:
				out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
				break;
			case 3:
				out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
				break;
			case 2:
				out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
				break;
			case 1:
			default:
				out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
				break;
		}

		// return the byte length of the sequence
		return len;
	}

	UString::size_type UString::_verifyUTF8( const unsigned char* c_str )
	{
		std::string tmp( reinterpret_cast<const char*>( c_str ) );
		return _verifyUTF8( tmp );
	}

	UString::size_type UString::_verifyUTF8( const std::string& str )
	{
		std::string::const_iterator i, ie = str.end();
		i = str.begin();
		size_type length = 0;

		while ( i != ie ) {
			// characters pass until we find an extended sequence
			if (( *i ) & 0x80 ) {
				unsigned char c = ( *i );
				size_t contBytes = 0;

				// get continuation byte count and test for overlong sequences
				if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
					if ( c == _lead1 )
					{
						//throw invalid_data( "overlong UTF-8 sequence" );
						return str.size();
					}
					contBytes = 1;

				} else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
					contBytes = 2;
					if ( c == _lead2 ) { // possible overlong UTF-8 sequence
						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
						if (( c & _lead2 ) == _cont )
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return str.size();
						}
					}

				} else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
					contBytes = 3;
					if ( c == _lead3 ) { // possible overlong UTF-8 sequence
						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
						if (( c & _lead3 ) == _cont )
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return str.size();
						}
					}

				} else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
					contBytes = 4;
					if ( c == _lead4 ) { // possible overlong UTF-8 sequence
						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
						if (( c & _lead4 ) == _cont )
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return str.size();
						}
					}

				} else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
					contBytes = 5;
					if ( c == _lead5 ) { // possible overlong UTF-8 sequence
						c = ( *( i + 1 ) ); // look ahead to next byte in sequence
						if (( c & _lead5 ) == _cont )
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return str.size();
						}
					}
				}

				// check remaining continuation bytes for
				while ( contBytes-- ) {
					c = ( *( ++i ) ); // get next byte in sequence
					if (( c & ~_cont_mask ) != _cont )
					{
						//throw invalid_data( "bad UTF-8 continuation byte" );
						return str.size();
					}
				}
			}
			length++;
			i++;
		}
		return length;
	}

	void UString::_init()
	{
		m_buffer.mVoidBuffer = nullptr;
		m_bufferType = bt_none;
		m_bufferSize = 0;
	}

	void UString::_cleanBuffer() const
	{
		if ( m_buffer.mVoidBuffer != nullptr ) {
			switch ( m_bufferType ) {
				case bt_string:
					delete m_buffer.mStrBuffer;
					break;
				case bt_wstring:
					delete m_buffer.mWStrBuffer;
					break;
				case bt_utf32string:
					delete m_buffer.mUTF32StrBuffer;
					break;
				case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
					//delete m_buffer.mVoidBuffer;
					// delete void* is undefined, don't do that
					assert("This should never happen - mVoidBuffer should never contain something if we "
						"don't know the type");
					break;
			}
			m_buffer.mVoidBuffer = nullptr;
			m_bufferSize = 0;
			m_bufferType = bt_none;
		}
	}

	void UString::_getBufferStr() const
	{
		if ( m_bufferType != bt_string ) {
			_cleanBuffer();
			m_buffer.mStrBuffer = new std::string();
			m_bufferType = bt_string;
		}
		m_buffer.mStrBuffer->clear();
	}

	void UString::_getBufferWStr() const
	{
		if ( m_bufferType != bt_wstring ) {
			_cleanBuffer();
			m_buffer.mWStrBuffer = new std::wstring();
			m_bufferType = bt_wstring;
		}
		m_buffer.mWStrBuffer->clear();
	}

	void UString::_getBufferUTF32Str() const
	{
		if ( m_bufferType != bt_utf32string ) {
			_cleanBuffer();
			m_buffer.mUTF32StrBuffer = new utf32string();
			m_bufferType = bt_utf32string;
		}
		m_buffer.mUTF32StrBuffer->clear();
	}

	void UString::_load_buffer_UTF8() const
	{
		_getBufferStr();
		std::string& buffer = ( *m_buffer.mStrBuffer );
		buffer.reserve( length() );

		unsigned char utf8buf[6];
		char* charbuf = ( char* )utf8buf;
		unicode_char c;
		size_t len;

		const_iterator i, ie = end();
		for ( i = begin(); i != ie; i.moveNext() ) {
			c = i.getCharacter();
			len = _utf32_to_utf8( c, utf8buf );
			size_t j = 0;
			while ( j < len )
				buffer.push_back( charbuf[j++] );
		}
	}

	void UString::_load_buffer_WStr() const
	{
		_getBufferWStr();
		std::wstring& buffer = ( *m_buffer.mWStrBuffer );
		buffer.reserve( length() ); // may over reserve, but should be close enough
#ifdef WCHAR_UTF16 // wchar_t matches UTF-16
		const_iterator i, ie = end();
		for ( i = begin(); i != ie; ++i ) {
			buffer.push_back(( wchar_t )( *i ) );
		}
#else // wchar_t fits UTF-32
		unicode_char c;
		const_iterator i, ie = end();
		for ( i = begin(); i != ie; i.moveNext() ) {
			c = i.getCharacter();
			buffer.push_back(( wchar_t )c );
		}
#endif
	}

	void UString::_load_buffer_UTF32() const
	{
		_getBufferUTF32Str();
		utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
		buffer.reserve( length() ); // may over reserve, but should be close enough

		unicode_char c;

		const_iterator i, ie = end();
		for ( i = begin(); i != ie; i.moveNext() ) {
			c = i.getCharacter();
			buffer.push_back( c );
		}
	}

} // namespace MyGUI