1 /*=========================================================================
2 
3   Program:   Visualization Toolkit
4   Module:    vtkUnicodeString.cxx
5 
6 -------------------------------------------------------------------------
7   Copyright 2008 Sandia Corporation.
8   Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9   the U.S. Government retains certain rights in this software.
10 -------------------------------------------------------------------------
11 
12   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
13   All rights reserved.
14   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
15 
16      This software is distributed WITHOUT ANY WARRANTY; without even
17      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
18      PURPOSE.  See the above copyright notice for more information.
19 
20 =========================================================================*/
21 
22 #include "vtkUnicodeString.h"
23 
24 #include "vtkObject.h"
25 #include <utf8.h>
26 
27 #include <map>
28 #include <stdexcept>
29 
30 ///////////////////////////////////////////////////////////////////////////
31 // vtkUnicodeString::const_iterator
32 
const_iterator()33 vtkUnicodeString::const_iterator::const_iterator()
34 {
35 }
36 
const_iterator(std::string::const_iterator position)37 vtkUnicodeString::const_iterator::const_iterator(std::string::const_iterator position) :
38   Position(position)
39 {
40 }
41 
operator *() const42 vtkUnicodeString::value_type vtkUnicodeString::const_iterator::operator*() const
43 {
44   return vtk_utf8::unchecked::peek_next(this->Position);
45 }
46 
operator ==(const const_iterator & rhs) const47 bool vtkUnicodeString::const_iterator::operator==(const const_iterator& rhs) const
48 {
49   return this->Position == rhs.Position;
50 }
51 
operator !=(const const_iterator & rhs) const52 bool vtkUnicodeString::const_iterator::operator!=(const const_iterator& rhs) const
53 {
54   return !(*this == rhs);
55 }
56 
operator ++()57 vtkUnicodeString::const_iterator& vtkUnicodeString::const_iterator::operator++()
58 {
59   vtk_utf8::unchecked::next(this->Position);
60   return *this;
61 }
62 
operator ++(int)63 vtkUnicodeString::const_iterator vtkUnicodeString::const_iterator::operator++(int)
64 {
65   const_iterator result(this->Position);
66   vtk_utf8::unchecked::next(this->Position);
67   return result;
68 }
69 
operator --()70 vtkUnicodeString::const_iterator& vtkUnicodeString::const_iterator::operator--()
71 {
72   vtk_utf8::unchecked::prior(this->Position);
73   return *this;
74 }
75 
operator --(int)76 vtkUnicodeString::const_iterator vtkUnicodeString::const_iterator::operator--(int)
77 {
78   const_iterator result(this->Position);
79   vtk_utf8::unchecked::prior(this->Position);
80   return result;
81 }
82 
83 ///////////////////////////////////////////////////////////////////////////
84 // vtkUnicodeString::back_insert_iterator
85 
86 // We provide our own implementation of std::back_insert_iterator for
87 // use with MSVC 6, where push_back() isn't implemented for std::string.
88 
89 class vtkUnicodeString::back_insert_iterator
90 {
91 public:
back_insert_iterator(std::string & container)92   back_insert_iterator(std::string& container) :
93     Container(&container)
94   {
95   }
96 
operator *()97   back_insert_iterator& operator*()
98   {
99     return *this;
100   }
101 
operator ++()102   back_insert_iterator& operator++()
103   {
104     return *this;
105   }
106 
operator ++(int)107   back_insert_iterator& operator++(int)
108   {
109     return *this;
110   }
111 
operator =(std::string::const_reference value)112   back_insert_iterator& operator=(std::string::const_reference value)
113   {
114     this->Container->push_back(value);
115     return *this;
116   }
117 
118 private:
119   std::string* Container;
120 };
121 
122 ///////////////////////////////////////////////////////////////////////////
123 // vtkUnicodeString
124 
vtkUnicodeString()125 vtkUnicodeString::vtkUnicodeString()
126 {
127 }
128 
vtkUnicodeString(const vtkUnicodeString & rhs)129 vtkUnicodeString::vtkUnicodeString(const vtkUnicodeString& rhs) :
130   Storage(rhs.Storage)
131 {
132 }
133 
vtkUnicodeString(size_type count,value_type character)134 vtkUnicodeString::vtkUnicodeString(size_type count, value_type character)
135 {
136   for(size_type i = 0; i != count; ++i)
137     vtk_utf8::append(character, vtkUnicodeString::back_insert_iterator(this->Storage));
138 }
139 
vtkUnicodeString(const_iterator first,const_iterator last)140 vtkUnicodeString::vtkUnicodeString(const_iterator first, const_iterator last) :
141   Storage(first.Position, last.Position)
142 {
143 }
144 
is_utf8(const char * value)145 bool vtkUnicodeString::is_utf8(const char* value)
146 {
147   return vtkUnicodeString::is_utf8(std::string(value ? value : ""));
148 }
149 
is_utf8(const std::string & value)150 bool vtkUnicodeString::is_utf8(const std::string& value)
151 {
152   return vtk_utf8::is_valid(value.begin(), value.end());
153 }
154 
from_utf8(const char * value)155 vtkUnicodeString vtkUnicodeString::from_utf8(const char* value)
156 {
157   return vtkUnicodeString::from_utf8(std::string(value ? value : ""));
158 }
159 
from_utf8(const char * begin,const char * end)160 vtkUnicodeString vtkUnicodeString::from_utf8(const char* begin, const char* end)
161 {
162   vtkUnicodeString result;
163   if(vtk_utf8::is_valid(begin, end))
164     {
165     result.Storage = std::string(begin, end);
166     }
167   else
168     {
169     vtkGenericWarningMacro("vtkUnicodeString::from_utf8(): not a valid UTF-8 string.");
170     }
171   return result;
172 }
173 
from_utf8(const std::string & value)174 vtkUnicodeString vtkUnicodeString::from_utf8(const std::string& value)
175 {
176   vtkUnicodeString result;
177   if(vtk_utf8::is_valid(value.begin(), value.end()))
178     {
179     result.Storage = value;
180     }
181   else
182     {
183     vtkGenericWarningMacro("vtkUnicodeString::from_utf8(): not a valid UTF-8 string.");
184     }
185   return result;
186 }
187 
from_utf16(const vtkTypeUInt16 * value)188 vtkUnicodeString vtkUnicodeString::from_utf16(const vtkTypeUInt16* value)
189 {
190   vtkUnicodeString result;
191 
192   if(value)
193     {
194     size_type length = 0;
195     while(value[length])
196       ++length;
197 
198     try
199       {
200       vtk_utf8::utf16to8(value, value + length, vtkUnicodeString::back_insert_iterator(result.Storage));
201       }
202     catch(vtk_utf8::invalid_utf16&)
203       {
204       vtkGenericWarningMacro(<< "vtkUnicodeString::from_utf16(): not a valid UTF-16 string.");
205       }
206     }
207 
208   return result;
209 }
210 
operator =(const vtkUnicodeString & rhs)211 vtkUnicodeString& vtkUnicodeString::operator=(const vtkUnicodeString& rhs)
212 {
213   if(this == &rhs)
214     return *this;
215 
216   this->Storage = rhs.Storage;
217   return *this;
218 }
219 
begin() const220 vtkUnicodeString::const_iterator vtkUnicodeString::begin() const
221 {
222   return const_iterator(this->Storage.begin());
223 }
224 
end() const225 vtkUnicodeString::const_iterator vtkUnicodeString::end() const
226 {
227   return const_iterator(this->Storage.end());
228 }
229 
at(size_type offset) const230 vtkUnicodeString::value_type vtkUnicodeString::at(size_type offset) const
231 {
232   if(offset >= this->character_count())
233     throw std::out_of_range("character out-of-range");
234 
235   std::string::const_iterator iterator = this->Storage.begin();
236   vtk_utf8::unchecked::advance(iterator, offset);
237   return vtk_utf8::unchecked::peek_next(iterator);
238 }
239 
operator [](size_type offset) const240 vtkUnicodeString::value_type vtkUnicodeString::operator[](size_type offset) const
241 {
242   std::string::const_iterator iterator = this->Storage.begin();
243   vtk_utf8::unchecked::advance(iterator, offset);
244   return vtk_utf8::unchecked::peek_next(iterator);
245 }
246 
utf8_str() const247 const char* vtkUnicodeString::utf8_str() const
248 {
249   return this->Storage.c_str();
250 }
251 
utf8_str(std::string & result) const252 void vtkUnicodeString::utf8_str(std::string& result) const
253 {
254   result = this->Storage;
255 }
256 
utf16_str() const257 std::vector<vtkTypeUInt16> vtkUnicodeString::utf16_str() const
258 {
259   std::vector<vtkTypeUInt16> result;
260   vtk_utf8::unchecked::utf8to16(this->Storage.begin(), this->Storage.end(), std::back_inserter(result));
261   return result;
262 }
263 
utf16_str(std::vector<vtkTypeUInt16> & result) const264 void vtkUnicodeString::utf16_str(std::vector<vtkTypeUInt16>& result) const
265 {
266   result.clear();
267   vtk_utf8::unchecked::utf8to16(this->Storage.begin(), this->Storage.end(), std::back_inserter(result));
268 }
269 
byte_count() const270 vtkUnicodeString::size_type vtkUnicodeString::byte_count() const
271 {
272   return this->Storage.size();
273 }
274 
character_count() const275 vtkUnicodeString::size_type vtkUnicodeString::character_count() const
276 {
277   return vtk_utf8::unchecked::distance(this->Storage.begin(), this->Storage.end());
278 }
279 
empty() const280 bool vtkUnicodeString::empty() const
281 {
282   return this->Storage.empty();
283 }
284 
285 const vtkUnicodeString::size_type vtkUnicodeString::npos = std::string::npos;
286 
operator +=(value_type value)287 vtkUnicodeString& vtkUnicodeString::operator+=(value_type value)
288 {
289   this->push_back(value);
290   return *this;
291 }
292 
operator +=(const vtkUnicodeString & rhs)293 vtkUnicodeString& vtkUnicodeString::operator+=(const vtkUnicodeString& rhs)
294 {
295   this->append(rhs);
296   return *this;
297 }
298 
push_back(value_type character)299 void vtkUnicodeString::push_back(value_type character)
300 {
301   try
302     {
303     vtk_utf8::append(character, vtkUnicodeString::back_insert_iterator(this->Storage));
304     }
305   catch(vtk_utf8::invalid_code_point&)
306     {
307     vtkGenericWarningMacro("vtkUnicodeString::push_back(): " << character << "is not a valid Unicode code point");
308     }
309 }
310 
append(const vtkUnicodeString & value)311 void vtkUnicodeString::append(const vtkUnicodeString& value)
312 {
313   this->Storage.append(value.Storage);
314 }
315 
append(size_type count,value_type character)316 void vtkUnicodeString::append(size_type count, value_type character)
317 {
318   try
319     {
320     this->Storage.append(vtkUnicodeString(count, character).Storage);
321     }
322   catch(vtk_utf8::invalid_code_point&)
323     {
324     vtkGenericWarningMacro("vtkUnicodeString::append(): " << character << "is not a valid Unicode code point");
325     }
326 }
327 
append(const_iterator first,const_iterator last)328 void vtkUnicodeString::append(const_iterator first, const_iterator last)
329 {
330 #if defined (__BORLANDC__) && (__BORLANDC__ < 0x0580)
331   this->Storage.append(first.Position, last.Position - first.Position);
332 #else
333   this->Storage.append(first.Position, last.Position);
334 #endif
335 }
336 
assign(const vtkUnicodeString & value)337 void vtkUnicodeString::assign(const vtkUnicodeString& value)
338 {
339   this->Storage.assign(value.Storage);
340 }
341 
assign(size_type count,value_type character)342 void vtkUnicodeString::assign(size_type count, value_type character)
343 {
344   try
345     {
346     this->Storage.assign(vtkUnicodeString(count, character).Storage);
347     }
348   catch(vtk_utf8::invalid_code_point&)
349     {
350     vtkGenericWarningMacro("vtkUnicodeString::assign(): " << character << "is not a valid Unicode code point");
351     }
352 }
353 
assign(const_iterator first,const_iterator last)354 void vtkUnicodeString::assign(const_iterator first, const_iterator last)
355 {
356 #if defined (__BORLANDC__) && (__BORLANDC__ < 0x0580)
357   this->Storage.assign(first.Position, last.Position - first.Position);
358 #else
359   this->Storage.assign(first.Position, last.Position);
360 #endif
361 }
362 
clear()363 void vtkUnicodeString::clear()
364 {
365   this->Storage.clear();
366 }
367 
fold_case() const368 vtkUnicodeString vtkUnicodeString::fold_case() const
369 {
370   typedef std::map<value_type, vtkUnicodeString> map_t;
371 
372   static map_t map;
373   if(map.empty())
374     {
375     #include "vtkUnicodeCaseFoldData.h"
376 
377     for(value_type* i = &vtkUnicodeCaseFoldData[0]; *i; ++i)
378       {
379       const value_type code = *i;
380       vtkUnicodeString mapping;
381       for(++i; *i; ++i)
382         {
383         mapping.push_back(*i);
384         }
385       map.insert(std::make_pair(code, mapping));
386       }
387     }
388 
389   vtkUnicodeString result;
390 
391   for(vtkUnicodeString::const_iterator source = this->begin(); source != this->end(); ++source)
392     {
393     map_t::const_iterator target = map.find(*source);
394     if(target != map.end())
395       {
396       result.append(target->second);
397       }
398     else
399       {
400       result.push_back(*source);
401       }
402     }
403 
404   return result;
405 }
406 
compare(const vtkUnicodeString & rhs) const407 int vtkUnicodeString::compare(const vtkUnicodeString& rhs) const
408 {
409   return this->Storage.compare(rhs.Storage);
410 }
411 
substr(size_type offset,size_type count) const412 vtkUnicodeString vtkUnicodeString::substr(size_type offset, size_type count) const
413 {
414   std::string::const_iterator from = this->Storage.begin();
415   std::string::const_iterator last = this->Storage.end();
416 
417   while(from != last && offset--)
418     vtk_utf8::unchecked::advance(from, 1);
419 
420   std::string::const_iterator to = from;
421   while(to != last && count--)
422     vtk_utf8::unchecked::advance(to, 1);
423 
424   return vtkUnicodeString(from, to);
425 }
426 
swap(vtkUnicodeString & rhs)427 void vtkUnicodeString::swap(vtkUnicodeString& rhs)
428 {
429   std::swap(this->Storage, rhs.Storage);
430 }
431 
operator ==(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)432 bool operator==(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
433 {
434   return lhs.compare(rhs) == 0;
435 }
436 
operator !=(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)437 bool operator!=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
438 {
439   return lhs.compare(rhs) != 0;
440 }
441 
operator <(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)442 bool operator<(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
443 {
444   return lhs.compare(rhs) < 0;
445 }
446 
operator <=(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)447 bool operator<=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
448 {
449   return lhs.compare(rhs) <= 0;
450 }
451 
operator >=(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)452 bool operator>=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
453 {
454   return lhs.compare(rhs) >= 0;
455 }
456 
operator >(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)457 bool operator>(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
458 {
459   return lhs.compare(rhs) > 0;
460 }
461