1 /////////////////////////////////////////////////////////////////////////////
2 // Name:        src/common/unichar.cpp
3 // Purpose:     wxUniChar and wxUniCharRef classes
4 // Author:      Vaclav Slavik
5 // Created:     2007-03-19
6 // Copyright:   (c) 2007 REA Elektronik GmbH
7 // Licence:     wxWindows licence
8 ///////////////////////////////////////////////////////////////////////////////
9 
10 // ===========================================================================
11 // headers
12 // ===========================================================================
13 
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
16 
17 
18 #ifndef WX_PRECOMP
19     #include "wx/strconv.h"  // wxConvLibc
20     #include "wx/log.h"
21 #endif
22 
23 #include "wx/unichar.h"
24 #include "wx/string.h"
25 
26 // ===========================================================================
27 // implementation
28 // ===========================================================================
29 
30 // ---------------------------------------------------------------------------
31 // wxUniChar
32 // ---------------------------------------------------------------------------
33 
34 /* static */
FromHi8bit(char c)35 wxUniChar::value_type wxUniChar::FromHi8bit(char c)
36 {
37 #if wxUSE_UTF8_LOCALE_ONLY
38     wxFAIL_MSG( "invalid UTF-8 character" );
39     wxUnusedVar(c);
40 
41     return wxT('?'); // FIXME-UTF8: what to use as failure character?
42 #else
43     char cbuf[2];
44     cbuf[0] = c;
45     cbuf[1] = '\0';
46     wchar_t wbuf[2];
47     if ( wxConvLibc.ToWChar(wbuf, 2, cbuf, 2) != 2 )
48     {
49         wxFAIL_MSG( "invalid multibyte character" );
50         return wxT('?'); // FIXME-UTF8: what to use as failure character?
51     }
52     return wbuf[0];
53 #endif
54 }
55 
56 /* static */
ToHi8bit(wxUniChar::value_type v)57 char wxUniChar::ToHi8bit(wxUniChar::value_type v)
58 {
59     char c;
60     if ( !GetAsHi8bit(v, &c) )
61     {
62         wxFAIL_MSG( "character cannot be converted to single byte" );
63         c = '?'; // FIXME-UTF8: what to use as failure character?
64     }
65 
66     return c;
67 }
68 
69 /* static */
GetAsHi8bit(value_type v,char * c)70 bool wxUniChar::GetAsHi8bit(value_type v, char *c)
71 {
72     wchar_t wbuf[2];
73     wbuf[0] = v;
74     wbuf[1] = L'\0';
75     char cbuf[2];
76     if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 )
77         return false;
78 
79     *c = cbuf[0];
80     return true;
81 }
82 
83 // ---------------------------------------------------------------------------
84 // wxUniCharRef
85 // ---------------------------------------------------------------------------
86 
87 #if wxUSE_UNICODE_UTF8
UniChar() const88 wxUniChar wxUniCharRef::UniChar() const
89 {
90     return wxStringOperations::DecodeChar(m_pos);
91 }
92 
operator =(const wxUniChar & c)93 wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
94 {
95     wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
96     size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
97     size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
98 
99     if ( lenNew == lenOld )
100     {
101         // this is the simpler case: if the new value's UTF-8 code has the
102         // same length, we can just replace it:
103 
104         iterator pos(m_pos);
105         for ( size_t i = 0; i < lenNew; ++i, ++pos )
106             *pos = utf[i];
107     }
108     else // length of character encoding in UTF-8 changed
109     {
110         // the worse case is when the new value has either longer or shorter
111         // code -- in that case, we have to use wxStringImpl::replace() and
112         // this invalidates all iterators, so we have to update them too:
113 
114         wxStringImpl& strimpl = m_str.m_impl;
115 
116         int iterDiff = lenNew - lenOld;
117         size_t posIdx = m_pos - strimpl.begin();
118 
119         // compute positions of outstanding iterators for this string after the
120         // replacement is done (there is only a small number of iterators at
121         // any time, so we use an array on the stack to avoid unneeded
122         // allocation):
123         static const size_t STATIC_SIZE = 32;
124         size_t indexes_a[STATIC_SIZE];
125         size_t *indexes = indexes_a;
126         size_t iterNum = 0;
127         wxStringIteratorNode *it;
128         for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
129         {
130             wxASSERT( it->m_iter || it->m_citer );
131 
132             if ( iterNum == STATIC_SIZE )
133             {
134                 wxLogTrace( wxT("utf8"), wxT("unexpectedly many iterators") );
135 
136                 size_t total = iterNum + 1;
137                 for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
138                     total++;
139                 indexes = new size_t[total];
140                 memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
141             }
142 
143             size_t idx = it->m_iter
144                          ? (*it->m_iter - strimpl.begin())
145                          : (*it->m_citer - strimpl.begin());
146 
147             if ( idx > posIdx )
148                 idx += iterDiff;
149 
150             indexes[iterNum] = idx;
151         }
152 
153         // update the string:
154         strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
155 
156 #if wxUSE_STRING_POS_CACHE
157         m_str.InvalidateCache();
158 #endif // wxUSE_STRING_POS_CACHE
159 
160         // finally, set the iterators to valid values again (note that this
161         // updates m_pos as well):
162         size_t i;
163         for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i )
164         {
165             wxASSERT( i < iterNum );
166             wxASSERT( it->m_iter || it->m_citer );
167 
168             if ( it->m_iter )
169                 *it->m_iter = strimpl.begin() + indexes[i];
170             else // it->m_citer
171                 *it->m_citer = strimpl.begin() + indexes[i];
172         }
173 
174         if ( indexes != indexes_a )
175             delete[] indexes;
176     }
177 
178     return *this;
179 }
180 #endif // wxUSE_UNICODE_UTF8
181