1 /*=========================================================================
2
3 Program: Visualization Toolkit
4 Module: vtkUnicodeString.cxx
5
6 -------------------------------------------------------------------------
7 Copyright 2008 Sandia Corporation.
8 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 the U.S. Government retains certain rights in this software.
10 -------------------------------------------------------------------------
11
12 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
13 All rights reserved.
14 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
15
16 This software is distributed WITHOUT ANY WARRANTY; without even
17 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
18 PURPOSE. See the above copyright notice for more information.
19
20 =========================================================================*/
21
22 #include "vtkUnicodeString.h"
23
24 #include "vtkObject.h"
25 #include <utf8.h>
26
27 #include <map>
28 #include <stdexcept>
29
30 ///////////////////////////////////////////////////////////////////////////
31 // vtkUnicodeString::const_iterator
32
const_iterator()33 vtkUnicodeString::const_iterator::const_iterator()
34 {
35 }
36
const_iterator(std::string::const_iterator position)37 vtkUnicodeString::const_iterator::const_iterator(std::string::const_iterator position) :
38 Position(position)
39 {
40 }
41
operator *() const42 vtkUnicodeString::value_type vtkUnicodeString::const_iterator::operator*() const
43 {
44 return vtk_utf8::unchecked::peek_next(this->Position);
45 }
46
operator ==(const const_iterator & rhs) const47 bool vtkUnicodeString::const_iterator::operator==(const const_iterator& rhs) const
48 {
49 return this->Position == rhs.Position;
50 }
51
operator !=(const const_iterator & rhs) const52 bool vtkUnicodeString::const_iterator::operator!=(const const_iterator& rhs) const
53 {
54 return !(*this == rhs);
55 }
56
operator ++()57 vtkUnicodeString::const_iterator& vtkUnicodeString::const_iterator::operator++()
58 {
59 vtk_utf8::unchecked::next(this->Position);
60 return *this;
61 }
62
operator ++(int)63 vtkUnicodeString::const_iterator vtkUnicodeString::const_iterator::operator++(int)
64 {
65 const_iterator result(this->Position);
66 vtk_utf8::unchecked::next(this->Position);
67 return result;
68 }
69
operator --()70 vtkUnicodeString::const_iterator& vtkUnicodeString::const_iterator::operator--()
71 {
72 vtk_utf8::unchecked::prior(this->Position);
73 return *this;
74 }
75
operator --(int)76 vtkUnicodeString::const_iterator vtkUnicodeString::const_iterator::operator--(int)
77 {
78 const_iterator result(this->Position);
79 vtk_utf8::unchecked::prior(this->Position);
80 return result;
81 }
82
83 ///////////////////////////////////////////////////////////////////////////
84 // vtkUnicodeString::back_insert_iterator
85
86 // We provide our own implementation of std::back_insert_iterator for
87 // use with MSVC 6, where push_back() isn't implemented for std::string.
88
89 class vtkUnicodeString::back_insert_iterator
90 {
91 public:
back_insert_iterator(std::string & container)92 back_insert_iterator(std::string& container) :
93 Container(&container)
94 {
95 }
96
operator *()97 back_insert_iterator& operator*()
98 {
99 return *this;
100 }
101
operator ++()102 back_insert_iterator& operator++()
103 {
104 return *this;
105 }
106
operator ++(int)107 back_insert_iterator& operator++(int)
108 {
109 return *this;
110 }
111
operator =(std::string::const_reference value)112 back_insert_iterator& operator=(std::string::const_reference value)
113 {
114 this->Container->push_back(value);
115 return *this;
116 }
117
118 private:
119 std::string* Container;
120 };
121
122 ///////////////////////////////////////////////////////////////////////////
123 // vtkUnicodeString
124
vtkUnicodeString()125 vtkUnicodeString::vtkUnicodeString()
126 {
127 }
128
vtkUnicodeString(const vtkUnicodeString & rhs)129 vtkUnicodeString::vtkUnicodeString(const vtkUnicodeString& rhs) :
130 Storage(rhs.Storage)
131 {
132 }
133
vtkUnicodeString(size_type count,value_type character)134 vtkUnicodeString::vtkUnicodeString(size_type count, value_type character)
135 {
136 for(size_type i = 0; i != count; ++i)
137 vtk_utf8::append(character, vtkUnicodeString::back_insert_iterator(this->Storage));
138 }
139
vtkUnicodeString(const_iterator first,const_iterator last)140 vtkUnicodeString::vtkUnicodeString(const_iterator first, const_iterator last) :
141 Storage(first.Position, last.Position)
142 {
143 }
144
is_utf8(const char * value)145 bool vtkUnicodeString::is_utf8(const char* value)
146 {
147 return vtkUnicodeString::is_utf8(std::string(value ? value : ""));
148 }
149
is_utf8(const std::string & value)150 bool vtkUnicodeString::is_utf8(const std::string& value)
151 {
152 return vtk_utf8::is_valid(value.begin(), value.end());
153 }
154
from_utf8(const char * value)155 vtkUnicodeString vtkUnicodeString::from_utf8(const char* value)
156 {
157 return vtkUnicodeString::from_utf8(std::string(value ? value : ""));
158 }
159
from_utf8(const char * begin,const char * end)160 vtkUnicodeString vtkUnicodeString::from_utf8(const char* begin, const char* end)
161 {
162 vtkUnicodeString result;
163 if(vtk_utf8::is_valid(begin, end))
164 {
165 result.Storage = std::string(begin, end);
166 }
167 else
168 {
169 vtkGenericWarningMacro("vtkUnicodeString::from_utf8(): not a valid UTF-8 string.");
170 }
171 return result;
172 }
173
from_utf8(const std::string & value)174 vtkUnicodeString vtkUnicodeString::from_utf8(const std::string& value)
175 {
176 vtkUnicodeString result;
177 if(vtk_utf8::is_valid(value.begin(), value.end()))
178 {
179 result.Storage = value;
180 }
181 else
182 {
183 vtkGenericWarningMacro("vtkUnicodeString::from_utf8(): not a valid UTF-8 string.");
184 }
185 return result;
186 }
187
from_utf16(const vtkTypeUInt16 * value)188 vtkUnicodeString vtkUnicodeString::from_utf16(const vtkTypeUInt16* value)
189 {
190 vtkUnicodeString result;
191
192 if(value)
193 {
194 size_type length = 0;
195 while(value[length])
196 ++length;
197
198 try
199 {
200 vtk_utf8::utf16to8(value, value + length, vtkUnicodeString::back_insert_iterator(result.Storage));
201 }
202 catch(vtk_utf8::invalid_utf16&)
203 {
204 vtkGenericWarningMacro(<< "vtkUnicodeString::from_utf16(): not a valid UTF-16 string.");
205 }
206 }
207
208 return result;
209 }
210
operator =(const vtkUnicodeString & rhs)211 vtkUnicodeString& vtkUnicodeString::operator=(const vtkUnicodeString& rhs)
212 {
213 if(this == &rhs)
214 return *this;
215
216 this->Storage = rhs.Storage;
217 return *this;
218 }
219
begin() const220 vtkUnicodeString::const_iterator vtkUnicodeString::begin() const
221 {
222 return const_iterator(this->Storage.begin());
223 }
224
end() const225 vtkUnicodeString::const_iterator vtkUnicodeString::end() const
226 {
227 return const_iterator(this->Storage.end());
228 }
229
at(size_type offset) const230 vtkUnicodeString::value_type vtkUnicodeString::at(size_type offset) const
231 {
232 if(offset >= this->character_count())
233 throw std::out_of_range("character out-of-range");
234
235 std::string::const_iterator iterator = this->Storage.begin();
236 vtk_utf8::unchecked::advance(iterator, offset);
237 return vtk_utf8::unchecked::peek_next(iterator);
238 }
239
operator [](size_type offset) const240 vtkUnicodeString::value_type vtkUnicodeString::operator[](size_type offset) const
241 {
242 std::string::const_iterator iterator = this->Storage.begin();
243 vtk_utf8::unchecked::advance(iterator, offset);
244 return vtk_utf8::unchecked::peek_next(iterator);
245 }
246
utf8_str() const247 const char* vtkUnicodeString::utf8_str() const
248 {
249 return this->Storage.c_str();
250 }
251
utf8_str(std::string & result) const252 void vtkUnicodeString::utf8_str(std::string& result) const
253 {
254 result = this->Storage;
255 }
256
utf16_str() const257 std::vector<vtkTypeUInt16> vtkUnicodeString::utf16_str() const
258 {
259 std::vector<vtkTypeUInt16> result;
260 vtk_utf8::unchecked::utf8to16(this->Storage.begin(), this->Storage.end(), std::back_inserter(result));
261 return result;
262 }
263
utf16_str(std::vector<vtkTypeUInt16> & result) const264 void vtkUnicodeString::utf16_str(std::vector<vtkTypeUInt16>& result) const
265 {
266 result.clear();
267 vtk_utf8::unchecked::utf8to16(this->Storage.begin(), this->Storage.end(), std::back_inserter(result));
268 }
269
byte_count() const270 vtkUnicodeString::size_type vtkUnicodeString::byte_count() const
271 {
272 return this->Storage.size();
273 }
274
character_count() const275 vtkUnicodeString::size_type vtkUnicodeString::character_count() const
276 {
277 return vtk_utf8::unchecked::distance(this->Storage.begin(), this->Storage.end());
278 }
279
empty() const280 bool vtkUnicodeString::empty() const
281 {
282 return this->Storage.empty();
283 }
284
285 const vtkUnicodeString::size_type vtkUnicodeString::npos = std::string::npos;
286
operator +=(value_type value)287 vtkUnicodeString& vtkUnicodeString::operator+=(value_type value)
288 {
289 this->push_back(value);
290 return *this;
291 }
292
operator +=(const vtkUnicodeString & rhs)293 vtkUnicodeString& vtkUnicodeString::operator+=(const vtkUnicodeString& rhs)
294 {
295 this->append(rhs);
296 return *this;
297 }
298
push_back(value_type character)299 void vtkUnicodeString::push_back(value_type character)
300 {
301 try
302 {
303 vtk_utf8::append(character, vtkUnicodeString::back_insert_iterator(this->Storage));
304 }
305 catch(vtk_utf8::invalid_code_point&)
306 {
307 vtkGenericWarningMacro("vtkUnicodeString::push_back(): " << character << "is not a valid Unicode code point");
308 }
309 }
310
append(const vtkUnicodeString & value)311 void vtkUnicodeString::append(const vtkUnicodeString& value)
312 {
313 this->Storage.append(value.Storage);
314 }
315
append(size_type count,value_type character)316 void vtkUnicodeString::append(size_type count, value_type character)
317 {
318 try
319 {
320 this->Storage.append(vtkUnicodeString(count, character).Storage);
321 }
322 catch(vtk_utf8::invalid_code_point&)
323 {
324 vtkGenericWarningMacro("vtkUnicodeString::append(): " << character << "is not a valid Unicode code point");
325 }
326 }
327
append(const_iterator first,const_iterator last)328 void vtkUnicodeString::append(const_iterator first, const_iterator last)
329 {
330 #if defined (__BORLANDC__) && (__BORLANDC__ < 0x0580)
331 this->Storage.append(first.Position, last.Position - first.Position);
332 #else
333 this->Storage.append(first.Position, last.Position);
334 #endif
335 }
336
assign(const vtkUnicodeString & value)337 void vtkUnicodeString::assign(const vtkUnicodeString& value)
338 {
339 this->Storage.assign(value.Storage);
340 }
341
assign(size_type count,value_type character)342 void vtkUnicodeString::assign(size_type count, value_type character)
343 {
344 try
345 {
346 this->Storage.assign(vtkUnicodeString(count, character).Storage);
347 }
348 catch(vtk_utf8::invalid_code_point&)
349 {
350 vtkGenericWarningMacro("vtkUnicodeString::assign(): " << character << "is not a valid Unicode code point");
351 }
352 }
353
assign(const_iterator first,const_iterator last)354 void vtkUnicodeString::assign(const_iterator first, const_iterator last)
355 {
356 #if defined (__BORLANDC__) && (__BORLANDC__ < 0x0580)
357 this->Storage.assign(first.Position, last.Position - first.Position);
358 #else
359 this->Storage.assign(first.Position, last.Position);
360 #endif
361 }
362
clear()363 void vtkUnicodeString::clear()
364 {
365 this->Storage.clear();
366 }
367
fold_case() const368 vtkUnicodeString vtkUnicodeString::fold_case() const
369 {
370 typedef std::map<value_type, vtkUnicodeString> map_t;
371
372 static map_t map;
373 if(map.empty())
374 {
375 #include "vtkUnicodeCaseFoldData.h"
376
377 for(value_type* i = &vtkUnicodeCaseFoldData[0]; *i; ++i)
378 {
379 const value_type code = *i;
380 vtkUnicodeString mapping;
381 for(++i; *i; ++i)
382 {
383 mapping.push_back(*i);
384 }
385 map.insert(std::make_pair(code, mapping));
386 }
387 }
388
389 vtkUnicodeString result;
390
391 for(vtkUnicodeString::const_iterator source = this->begin(); source != this->end(); ++source)
392 {
393 map_t::const_iterator target = map.find(*source);
394 if(target != map.end())
395 {
396 result.append(target->second);
397 }
398 else
399 {
400 result.push_back(*source);
401 }
402 }
403
404 return result;
405 }
406
compare(const vtkUnicodeString & rhs) const407 int vtkUnicodeString::compare(const vtkUnicodeString& rhs) const
408 {
409 return this->Storage.compare(rhs.Storage);
410 }
411
substr(size_type offset,size_type count) const412 vtkUnicodeString vtkUnicodeString::substr(size_type offset, size_type count) const
413 {
414 std::string::const_iterator from = this->Storage.begin();
415 std::string::const_iterator last = this->Storage.end();
416
417 while(from != last && offset--)
418 vtk_utf8::unchecked::advance(from, 1);
419
420 std::string::const_iterator to = from;
421 while(to != last && count--)
422 vtk_utf8::unchecked::advance(to, 1);
423
424 return vtkUnicodeString(from, to);
425 }
426
swap(vtkUnicodeString & rhs)427 void vtkUnicodeString::swap(vtkUnicodeString& rhs)
428 {
429 std::swap(this->Storage, rhs.Storage);
430 }
431
operator ==(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)432 bool operator==(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
433 {
434 return lhs.compare(rhs) == 0;
435 }
436
operator !=(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)437 bool operator!=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
438 {
439 return lhs.compare(rhs) != 0;
440 }
441
operator <(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)442 bool operator<(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
443 {
444 return lhs.compare(rhs) < 0;
445 }
446
operator <=(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)447 bool operator<=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
448 {
449 return lhs.compare(rhs) <= 0;
450 }
451
operator >=(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)452 bool operator>=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
453 {
454 return lhs.compare(rhs) >= 0;
455 }
456
operator >(const vtkUnicodeString & lhs,const vtkUnicodeString & rhs)457 bool operator>(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs)
458 {
459 return lhs.compare(rhs) > 0;
460 }
461