1 /* Copyright (C) 2002 The gtkmm Development Team
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include <glibmmconfig.h>
18 #include <glibmm/ustring.h>
19 #include <glibmm/convert.h>
20 #include <glibmm/error.h>
21 #include <glibmm/utility.h>
22
23 #include <algorithm>
24 #include <iostream>
25 #include <cstring>
26 #include <stdexcept>
27 #include <utility> // For std::move()
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31
32 // If glibmm is built with Autotools, GLIBMM_SIZEOF_WCHAR_T is not defined and
33 // SIZEOF_WCHAR_T is defined in config.h.
34 // If glibmm is built with Meson, config.h does not exist and
35 // GLIBMM_SIZEOF_WCHAR_T is defined in glibmmconfig.h.
36 #if !defined(SIZEOF_WCHAR_T) && defined(GLIBMM_SIZEOF_WCHAR_T)
37 #define SIZEOF_WCHAR_T GLIBMM_SIZEOF_WCHAR_T
38 #endif
39
40 namespace
41 {
42
43 using Glib::ustring;
44
45 // Little helper to make the conversion from gunichar to UTF-8 a one-liner.
46 //
47 struct UnicharToUtf8
48 {
49 char buf[6];
50 ustring::size_type len;
51
UnicharToUtf8__anonbff8c9220111::UnicharToUtf852 explicit UnicharToUtf8(gunichar uc) : len(g_unichar_to_utf8(uc, buf)) {}
53 };
54
55 // All utf8_*_offset() functions return npos if offset is out of range.
56 // The caller should decide if npos is a valid argument and just marks
57 // the whole string, or if it is not allowed (e.g. for start positions).
58 // In the latter case std::out_of_range should be thrown, but usually
59 // std::string will do that for us.
60
61 // First overload: stop on '\0' character.
62 static ustring::size_type
utf8_byte_offset(const char * str,ustring::size_type offset)63 utf8_byte_offset(const char* str, ustring::size_type offset)
64 {
65 if (offset == ustring::npos)
66 return ustring::npos;
67
68 const char* const utf8_skip = g_utf8_skip;
69 const char* p = str;
70
71 for (; offset != 0; --offset)
72 {
73 const unsigned int c = static_cast<unsigned char>(*p);
74
75 if (c == 0)
76 return ustring::npos;
77
78 p += utf8_skip[c];
79 }
80
81 return (p - str);
82 }
83
84 // Second overload: stop when reaching maxlen.
85 static ustring::size_type
utf8_byte_offset(const char * str,ustring::size_type offset,ustring::size_type maxlen)86 utf8_byte_offset(const char* str, ustring::size_type offset, ustring::size_type maxlen)
87 {
88 if (offset == ustring::npos)
89 return ustring::npos;
90
91 const char* const utf8_skip = g_utf8_skip;
92 const char* const pend = str + maxlen;
93 const char* p = str;
94
95 for (; offset != 0; --offset)
96 {
97 if (p >= pend)
98 return ustring::npos;
99
100 p += utf8_skip[static_cast<unsigned char>(*p)];
101 }
102
103 return (p - str);
104 }
105
106 // Third overload: stop when reaching str.size().
107 //
108 inline ustring::size_type
utf8_byte_offset(const std::string & str,ustring::size_type offset)109 utf8_byte_offset(const std::string& str, ustring::size_type offset)
110 {
111 return utf8_byte_offset(str.data(), offset, str.size());
112 }
113
114 // Takes UTF-8 character offset and count in ci and cn.
115 // Returns the byte offset and count in i and n.
116 //
117 struct Utf8SubstrBounds
118 {
119 ustring::size_type i;
120 ustring::size_type n;
121
Utf8SubstrBounds__anonbff8c9220111::Utf8SubstrBounds122 Utf8SubstrBounds(const std::string& str, ustring::size_type ci, ustring::size_type cn)
123 : i(utf8_byte_offset(str, ci)), n(ustring::npos)
124 {
125 if (i != ustring::npos)
126 n = utf8_byte_offset(str.data() + i, cn, str.size() - i);
127 }
128 };
129
130 // Converts byte offset to UTF-8 character offset.
131 inline ustring::size_type
utf8_char_offset(const std::string & str,ustring::size_type offset)132 utf8_char_offset(const std::string& str, ustring::size_type offset)
133 {
134 if (offset == ustring::npos)
135 return ustring::npos;
136
137 const char* const pdata = str.data();
138 return g_utf8_pointer_to_offset(pdata, pdata + offset);
139 }
140
141 // Helper to implement ustring::find_first_of() and find_first_not_of().
142 // Returns the UTF-8 character offset, or ustring::npos if not found.
143 static ustring::size_type
utf8_find_first_of(const std::string & str,ustring::size_type offset,const char * utf8_match,long utf8_match_size,bool find_not_of)144 utf8_find_first_of(const std::string& str, ustring::size_type offset, const char* utf8_match,
145 long utf8_match_size, bool find_not_of)
146 {
147 const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
148 if (byte_offset == ustring::npos)
149 return ustring::npos;
150
151 long ucs4_match_size = 0;
152 const auto ucs4_match =
153 Glib::make_unique_ptr_gfree(g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
154
155 const gunichar* const match_begin = ucs4_match.get();
156 const gunichar* const match_end = match_begin + ucs4_match_size;
157
158 const char* const str_begin = str.data();
159 const char* const str_end = str_begin + str.size();
160
161 for (const char* pstr = str_begin + byte_offset; pstr < str_end; pstr = g_utf8_next_char(pstr))
162 {
163 const gunichar* const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
164
165 if ((pfound != match_end) != find_not_of)
166 return offset;
167
168 ++offset;
169 }
170
171 return ustring::npos;
172 }
173
174 // Helper to implement ustring::find_last_of() and find_last_not_of().
175 // Returns the UTF-8 character offset, or ustring::npos if not found.
176 static ustring::size_type
utf8_find_last_of(const std::string & str,ustring::size_type offset,const char * utf8_match,long utf8_match_size,bool find_not_of)177 utf8_find_last_of(const std::string& str, ustring::size_type offset, const char* utf8_match,
178 long utf8_match_size, bool find_not_of)
179 {
180 long ucs4_match_size = 0;
181 const auto ucs4_match =
182 Glib::make_unique_ptr_gfree(g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
183
184 const gunichar* const match_begin = ucs4_match.get();
185 const gunichar* const match_end = match_begin + ucs4_match_size;
186
187 const char* const str_begin = str.data();
188 const char* pstr = str_begin;
189
190 // Set pstr one byte beyond the actual start position.
191 const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
192 pstr += (byte_offset < str.size()) ? byte_offset + 1 : str.size();
193
194 while (pstr > str_begin)
195 {
196 // Move to previous character.
197 do
198 --pstr;
199 while ((static_cast<unsigned char>(*pstr) & 0xC0u) == 0x80);
200
201 const gunichar* const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
202
203 if ((pfound != match_end) != find_not_of)
204 return g_utf8_pointer_to_offset(str_begin, pstr);
205 }
206
207 return ustring::npos;
208 }
209
210 } // anonymous namespace
211
212 namespace Glib
213 {
214
215 #ifndef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS
216 // Initialize static member here,
217 // because the compiler did not allow us do it inline.
218 const ustring::size_type ustring::npos = std::string::npos;
219 #endif
220
221 /*
222 * We need our own version of g_utf8_get_char(), because the std::string
223 * iterator is not necessarily a plain pointer (it's in fact not in GCC's
224 * libstdc++-v3). Copying the UTF-8 data into a temporary buffer isn't an
225 * option since this operation is quite time critical. The implementation
226 * is quite different from g_utf8_get_char() -- both more generic and likely
227 * faster.
228 *
229 * By looking at the first byte of a UTF-8 character one can determine the
230 * number of bytes used. GLib offers the g_utf8_skip[] array for this purpose,
231 * but accessing this global variable would, on IA32 at least, introduce
232 * a function call to fetch the Global Offset Table, plus two levels of
233 * indirection in order to read the value. Even worse, fetching the GOT is
234 * always done right at the start of the function instead of the branch that
235 * actually uses the variable.
236 *
237 * Fortunately, there's a better way to get the byte count. As this table
238 * shows, there's a nice regular pattern in the UTF-8 encoding scheme:
239 *
240 * 0x00000000 - 0x0000007F: 0xxxxxxx
241 * 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx
242 * 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
243 * 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
244 * 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
245 * 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
246 *
247 * Except for the single byte case, the number of leading 1-bits equals the
248 * byte count. All that is needed is to shift the first byte to the left
249 * until bit 7 becomes 0. Naturally, doing so requires a loop -- but since
250 * we already have one, no additional cost is introduced. This shifting can
251 * further be combined with the computation of the bitmask needed to eliminate
252 * the leading length bits, thus saving yet another register.
253 *
254 * Note: If you change this code, it is advisable to also review what the
255 * compiler makes of it in the assembler output. Except for some pointless
256 * register moves, the generated code is sufficiently close to the optimum
257 * with GCC 4.1.2 on x86_64.
258 */
259 gunichar
get_unichar_from_std_iterator(std::string::const_iterator pos)260 get_unichar_from_std_iterator(std::string::const_iterator pos)
261 {
262 unsigned int result = static_cast<unsigned char>(*pos);
263
264 if ((result & 0x80) != 0)
265 {
266 unsigned int mask = 0x40;
267
268 do
269 {
270 result <<= 6;
271 const unsigned int c = static_cast<unsigned char>(*++pos);
272 mask <<= 5;
273 result += c - 0x80;
274 } while ((result & mask) != 0);
275
276 result &= mask - 1;
277 }
278
279 return result;
280 }
281
282 /**** Glib::ustring ********************************************************/
283
ustring()284 ustring::ustring() : string_()
285 {
286 }
287
ustring(const ustring & other)288 ustring::ustring(const ustring& other) : string_(other.string_)
289 {
290 }
291
ustring(ustring && other)292 ustring::ustring(ustring&& other) : string_(std::move(other.string_))
293 {
294 }
295
ustring(const ustring & src,ustring::size_type i,ustring::size_type n)296 ustring::ustring(const ustring& src, ustring::size_type i, ustring::size_type n) : string_()
297 {
298 const Utf8SubstrBounds bounds(src.string_, i, n);
299 string_.assign(src.string_, bounds.i, bounds.n);
300 }
301
ustring(const char * src,ustring::size_type n)302 ustring::ustring(const char* src, ustring::size_type n) : string_(src, utf8_byte_offset(src, n))
303 {
304 }
305
ustring(const char * src)306 ustring::ustring(const char* src) : string_(src)
307 {
308 }
309
ustring(ustring::size_type n,gunichar uc)310 ustring::ustring(ustring::size_type n, gunichar uc) : string_()
311 {
312 if (uc < 0x80)
313 {
314 // Optimize the probably most common case.
315 string_.assign(n, static_cast<char>(uc));
316 }
317 else
318 {
319 const UnicharToUtf8 conv(uc);
320 string_.reserve(n * conv.len);
321
322 for (; n > 0; --n)
323 string_.append(conv.buf, conv.len);
324 }
325 }
326
ustring(ustring::size_type n,char c)327 ustring::ustring(ustring::size_type n, char c) : string_(n, c)
328 {
329 }
330
ustring(const std::string & src)331 ustring::ustring(const std::string& src) : string_(src)
332 {
333 }
334
ustring(std::string && src)335 ustring::ustring(std::string&& src) : string_(std::move(src))
336 {
337 }
338
~ustring()339 ustring::~ustring() noexcept
340 {
341 }
342
343 void
swap(ustring & other)344 ustring::swap(ustring& other)
345 {
346 string_.swap(other.string_);
347 }
348
349 /**** Glib::ustring::operator=() *******************************************/
350
351 ustring&
operator =(const ustring & other)352 ustring::operator=(const ustring& other)
353 {
354 string_ = other.string_;
355 return *this;
356 }
357
358 ustring&
operator =(ustring && other)359 ustring::operator=(ustring&& other)
360 {
361 string_ = std::move(other.string_);
362 return *this;
363 }
364
365 ustring&
operator =(const std::string & src)366 ustring::operator=(const std::string& src)
367 {
368 string_ = src;
369 return *this;
370 }
371
372 ustring&
operator =(std::string && src)373 ustring::operator=(std::string&& src)
374 {
375 string_ = std::move(src);
376 return *this;
377 }
378
379 ustring&
operator =(const char * src)380 ustring::operator=(const char* src)
381 {
382 string_ = src;
383 return *this;
384 }
385
386 ustring&
operator =(gunichar uc)387 ustring::operator=(gunichar uc)
388 {
389 const UnicharToUtf8 conv(uc);
390 string_.assign(conv.buf, conv.len);
391 return *this;
392 }
393
394 ustring&
operator =(char c)395 ustring::operator=(char c)
396 {
397 string_ = c;
398 return *this;
399 }
400
401 /**** Glib::ustring::assign() **********************************************/
402
403 ustring&
assign(const ustring & src)404 ustring::assign(const ustring& src)
405 {
406 string_ = src.string_;
407 return *this;
408 }
409
410 ustring&
assign(ustring && src)411 ustring::assign(ustring&& src)
412 {
413 string_ = std::move(src.string_);
414 return *this;
415 }
416
417 ustring&
assign(const ustring & src,ustring::size_type i,ustring::size_type n)418 ustring::assign(const ustring& src, ustring::size_type i, ustring::size_type n)
419 {
420 const Utf8SubstrBounds bounds(src.string_, i, n);
421 string_.assign(src.string_, bounds.i, bounds.n);
422 return *this;
423 }
424
425 ustring&
assign(const char * src,ustring::size_type n)426 ustring::assign(const char* src, ustring::size_type n)
427 {
428 string_.assign(src, utf8_byte_offset(src, n));
429 return *this;
430 }
431
432 ustring&
assign(const char * src)433 ustring::assign(const char* src)
434 {
435 string_ = src;
436 return *this;
437 }
438
439 ustring&
assign(ustring::size_type n,gunichar uc)440 ustring::assign(ustring::size_type n, gunichar uc)
441 {
442 ustring temp(n, uc);
443 string_.swap(temp.string_);
444 return *this;
445 }
446
447 ustring&
assign(ustring::size_type n,char c)448 ustring::assign(ustring::size_type n, char c)
449 {
450 string_.assign(n, c);
451 return *this;
452 }
453
454 /**** Glib::ustring::operator+=() ******************************************/
455
456 ustring&
operator +=(const ustring & src)457 ustring::operator+=(const ustring& src)
458 {
459 string_ += src.string_;
460 return *this;
461 }
462
463 ustring&
operator +=(const char * src)464 ustring::operator+=(const char* src)
465 {
466 string_ += src;
467 return *this;
468 }
469
470 ustring&
operator +=(gunichar uc)471 ustring::operator+=(gunichar uc)
472 {
473 const UnicharToUtf8 conv(uc);
474 string_.append(conv.buf, conv.len);
475 return *this;
476 }
477
478 ustring&
operator +=(char c)479 ustring::operator+=(char c)
480 {
481 string_ += c;
482 return *this;
483 }
484
485 /**** Glib::ustring::push_back() *******************************************/
486
487 void
push_back(gunichar uc)488 ustring::push_back(gunichar uc)
489 {
490 const UnicharToUtf8 conv(uc);
491 string_.append(conv.buf, conv.len);
492 }
493
494 void
push_back(char c)495 ustring::push_back(char c)
496 {
497 string_ += c;
498 }
499
500 /**** Glib::ustring::append() **********************************************/
501
502 ustring&
append(const ustring & src)503 ustring::append(const ustring& src)
504 {
505 string_ += src.string_;
506 return *this;
507 }
508
509 ustring&
append(const ustring & src,ustring::size_type i,ustring::size_type n)510 ustring::append(const ustring& src, ustring::size_type i, ustring::size_type n)
511 {
512 const Utf8SubstrBounds bounds(src.string_, i, n);
513 string_.append(src.string_, bounds.i, bounds.n);
514 return *this;
515 }
516
517 ustring&
append(const char * src,ustring::size_type n)518 ustring::append(const char* src, ustring::size_type n)
519 {
520 string_.append(src, utf8_byte_offset(src, n));
521 return *this;
522 }
523
524 ustring&
append(const char * src)525 ustring::append(const char* src)
526 {
527 string_ += src;
528 return *this;
529 }
530
531 ustring&
append(ustring::size_type n,gunichar uc)532 ustring::append(ustring::size_type n, gunichar uc)
533 {
534 string_.append(ustring(n, uc).string_);
535 return *this;
536 }
537
538 ustring&
append(ustring::size_type n,char c)539 ustring::append(ustring::size_type n, char c)
540 {
541 string_.append(n, c);
542 return *this;
543 }
544
545 /**** Glib::ustring::insert() **********************************************/
546
547 ustring&
insert(ustring::size_type i,const ustring & src)548 ustring::insert(ustring::size_type i, const ustring& src)
549 {
550 string_.insert(utf8_byte_offset(string_, i), src.string_);
551 return *this;
552 }
553
554 ustring&
insert(ustring::size_type i,const ustring & src,ustring::size_type i2,ustring::size_type n)555 ustring::insert(
556 ustring::size_type i, const ustring& src, ustring::size_type i2, ustring::size_type n)
557 {
558 const Utf8SubstrBounds bounds2(src.string_, i2, n);
559 string_.insert(utf8_byte_offset(string_, i), src.string_, bounds2.i, bounds2.n);
560 return *this;
561 }
562
563 ustring&
insert(ustring::size_type i,const char * src,ustring::size_type n)564 ustring::insert(ustring::size_type i, const char* src, ustring::size_type n)
565 {
566 string_.insert(utf8_byte_offset(string_, i), src, utf8_byte_offset(src, n));
567 return *this;
568 }
569
570 ustring&
insert(ustring::size_type i,const char * src)571 ustring::insert(ustring::size_type i, const char* src)
572 {
573 string_.insert(utf8_byte_offset(string_, i), src);
574 return *this;
575 }
576
577 ustring&
insert(ustring::size_type i,ustring::size_type n,gunichar uc)578 ustring::insert(ustring::size_type i, ustring::size_type n, gunichar uc)
579 {
580 string_.insert(utf8_byte_offset(string_, i), ustring(n, uc).string_);
581 return *this;
582 }
583
584 ustring&
insert(ustring::size_type i,ustring::size_type n,char c)585 ustring::insert(ustring::size_type i, ustring::size_type n, char c)
586 {
587 string_.insert(utf8_byte_offset(string_, i), n, c);
588 return *this;
589 }
590
591 ustring::iterator
insert(ustring::iterator p,gunichar uc)592 ustring::insert(ustring::iterator p, gunichar uc)
593 {
594 const size_type offset = p.base() - string_.begin();
595 const UnicharToUtf8 conv(uc);
596 string_.insert(offset, conv.buf, conv.len);
597 return iterator(string_.begin() + offset);
598 }
599
600 ustring::iterator
insert(ustring::iterator p,char c)601 ustring::insert(ustring::iterator p, char c)
602 {
603 return iterator(string_.insert(p.base(), c));
604 }
605
606 void
insert(ustring::iterator p,ustring::size_type n,gunichar uc)607 ustring::insert(ustring::iterator p, ustring::size_type n, gunichar uc)
608 {
609 string_.insert(p.base() - string_.begin(), ustring(n, uc).string_);
610 }
611
612 void
insert(ustring::iterator p,ustring::size_type n,char c)613 ustring::insert(ustring::iterator p, ustring::size_type n, char c)
614 {
615 string_.insert(p.base(), n, c);
616 }
617
618 /**** Glib::ustring::replace() *********************************************/
619
620 ustring&
replace(ustring::size_type i,ustring::size_type n,const ustring & src)621 ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src)
622 {
623 const Utf8SubstrBounds bounds(string_, i, n);
624 string_.replace(bounds.i, bounds.n, src.string_);
625 return *this;
626 }
627
628 ustring&
replace(ustring::size_type i,ustring::size_type n,const ustring & src,ustring::size_type i2,ustring::size_type n2)629 ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src,
630 ustring::size_type i2, ustring::size_type n2)
631 {
632 const Utf8SubstrBounds bounds(string_, i, n);
633 const Utf8SubstrBounds bounds2(src.string_, i2, n2);
634 string_.replace(bounds.i, bounds.n, src.string_, bounds2.i, bounds2.n);
635 return *this;
636 }
637
638 ustring&
replace(ustring::size_type i,ustring::size_type n,const char * src,ustring::size_type n2)639 ustring::replace(ustring::size_type i, ustring::size_type n, const char* src, ustring::size_type n2)
640 {
641 const Utf8SubstrBounds bounds(string_, i, n);
642 string_.replace(bounds.i, bounds.n, src, utf8_byte_offset(src, n2));
643 return *this;
644 }
645
646 ustring&
replace(ustring::size_type i,ustring::size_type n,const char * src)647 ustring::replace(ustring::size_type i, ustring::size_type n, const char* src)
648 {
649 const Utf8SubstrBounds bounds(string_, i, n);
650 string_.replace(bounds.i, bounds.n, src);
651 return *this;
652 }
653
654 ustring&
replace(ustring::size_type i,ustring::size_type n,ustring::size_type n2,gunichar uc)655 ustring::replace(ustring::size_type i, ustring::size_type n, ustring::size_type n2, gunichar uc)
656 {
657 const Utf8SubstrBounds bounds(string_, i, n);
658 string_.replace(bounds.i, bounds.n, ustring(n2, uc).string_);
659 return *this;
660 }
661
662 ustring&
replace(ustring::size_type i,ustring::size_type n,ustring::size_type n2,char c)663 ustring::replace(ustring::size_type i, ustring::size_type n, ustring::size_type n2, char c)
664 {
665 const Utf8SubstrBounds bounds(string_, i, n);
666 string_.replace(bounds.i, bounds.n, n2, c);
667 return *this;
668 }
669
670 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,const ustring & src)671 ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const ustring& src)
672 {
673 string_.replace(pbegin.base(), pend.base(), src.string_);
674 return *this;
675 }
676
677 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,const char * src,ustring::size_type n)678 ustring::replace(
679 ustring::iterator pbegin, ustring::iterator pend, const char* src, ustring::size_type n)
680 {
681 string_.replace(pbegin.base(), pend.base(), src, utf8_byte_offset(src, n));
682 return *this;
683 }
684
685 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,const char * src)686 ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const char* src)
687 {
688 string_.replace(pbegin.base(), pend.base(), src);
689 return *this;
690 }
691
692 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,ustring::size_type n,gunichar uc)693 ustring::replace(
694 ustring::iterator pbegin, ustring::iterator pend, ustring::size_type n, gunichar uc)
695 {
696 string_.replace(pbegin.base(), pend.base(), ustring(n, uc).string_);
697 return *this;
698 }
699
700 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,ustring::size_type n,char c)701 ustring::replace(ustring::iterator pbegin, ustring::iterator pend, ustring::size_type n, char c)
702 {
703 string_.replace(pbegin.base(), pend.base(), n, c);
704 return *this;
705 }
706
707 /**** Glib::ustring::erase() ***********************************************/
708
709 void
clear()710 ustring::clear()
711 {
712 string_.erase();
713 }
714
715 ustring&
erase(ustring::size_type i,ustring::size_type n)716 ustring::erase(ustring::size_type i, ustring::size_type n)
717 {
718 const Utf8SubstrBounds bounds(string_, i, n);
719 string_.erase(bounds.i, bounds.n);
720 return *this;
721 }
722
723 ustring&
erase()724 ustring::erase()
725 {
726 string_.erase();
727 return *this;
728 }
729
730 ustring::iterator
erase(ustring::iterator p)731 ustring::erase(ustring::iterator p)
732 {
733 ustring::iterator iter_end = p;
734 ++iter_end;
735
736 return iterator(string_.erase(p.base(), iter_end.base()));
737 }
738
739 ustring::iterator
erase(ustring::iterator pbegin,ustring::iterator pend)740 ustring::erase(ustring::iterator pbegin, ustring::iterator pend)
741 {
742 return iterator(string_.erase(pbegin.base(), pend.base()));
743 }
744
745 /**** Glib::ustring::compare() *********************************************/
746
747 int
compare(const ustring & rhs) const748 ustring::compare(const ustring& rhs) const
749 {
750 return g_utf8_collate(string_.c_str(), rhs.string_.c_str());
751 }
752
753 int
compare(const char * rhs) const754 ustring::compare(const char* rhs) const
755 {
756 return g_utf8_collate(string_.c_str(), rhs);
757 }
758
759 int
compare(ustring::size_type i,ustring::size_type n,const ustring & rhs) const760 ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs) const
761 {
762 return ustring(*this, i, n).compare(rhs);
763 }
764
765 int
compare(ustring::size_type i,ustring::size_type n,const ustring & rhs,ustring::size_type i2,ustring::size_type n2) const766 ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs,
767 ustring::size_type i2, ustring::size_type n2) const
768 {
769 return ustring(*this, i, n).compare(ustring(rhs, i2, n2));
770 }
771
772 int
compare(ustring::size_type i,ustring::size_type n,const char * rhs,ustring::size_type n2) const773 ustring::compare(
774 ustring::size_type i, ustring::size_type n, const char* rhs, ustring::size_type n2) const
775 {
776 return ustring(*this, i, n).compare(ustring(rhs, n2));
777 }
778
779 int
compare(ustring::size_type i,ustring::size_type n,const char * rhs) const780 ustring::compare(ustring::size_type i, ustring::size_type n, const char* rhs) const
781 {
782 return ustring(*this, i, n).compare(rhs);
783 }
784
785 /**** Glib::ustring -- index access ****************************************/
786
operator [](ustring::size_type i) const787 ustring::value_type ustring::operator[](ustring::size_type i) const
788 {
789 return g_utf8_get_char(g_utf8_offset_to_pointer(string_.data(), i));
790 }
791
792 ustring::value_type
at(ustring::size_type i) const793 ustring::at(ustring::size_type i) const
794 {
795 const size_type byte_offset = utf8_byte_offset(string_, i);
796
797 // Throws std::out_of_range if the index is invalid.
798 return g_utf8_get_char(&string_.at(byte_offset));
799 }
800
801 /**** Glib::ustring -- iterator access *************************************/
802
803 ustring::iterator
begin()804 ustring::begin()
805 {
806 return iterator(string_.begin());
807 }
808
809 ustring::iterator
end()810 ustring::end()
811 {
812 return iterator(string_.end());
813 }
814
815 ustring::const_iterator
begin() const816 ustring::begin() const
817 {
818 return const_iterator(string_.begin());
819 }
820
821 ustring::const_iterator
end() const822 ustring::end() const
823 {
824 return const_iterator(string_.end());
825 }
826
827 ustring::reverse_iterator
rbegin()828 ustring::rbegin()
829 {
830 return reverse_iterator(iterator(string_.end()));
831 }
832
833 ustring::reverse_iterator
rend()834 ustring::rend()
835 {
836 return reverse_iterator(iterator(string_.begin()));
837 }
838
839 ustring::const_reverse_iterator
rbegin() const840 ustring::rbegin() const
841 {
842 return const_reverse_iterator(const_iterator(string_.end()));
843 }
844
845 ustring::const_reverse_iterator
rend() const846 ustring::rend() const
847 {
848 return const_reverse_iterator(const_iterator(string_.begin()));
849 }
850
851 ustring::const_iterator
cbegin() const852 ustring::cbegin() const
853 {
854 return const_iterator(string_.begin());
855 }
856
857 ustring::const_iterator
cend() const858 ustring::cend() const
859 {
860 return const_iterator(string_.end());
861 }
862
863 /**** Glib::ustring::find() ************************************************/
864
865 ustring::size_type
find(const ustring & str,ustring::size_type i) const866 ustring::find(const ustring& str, ustring::size_type i) const
867 {
868 return utf8_char_offset(string_, string_.find(str.string_, utf8_byte_offset(string_, i)));
869 }
870
871 ustring::size_type
find(const char * str,ustring::size_type i,ustring::size_type n) const872 ustring::find(const char* str, ustring::size_type i, ustring::size_type n) const
873 {
874 return utf8_char_offset(
875 string_, string_.find(str, utf8_byte_offset(string_, i), utf8_byte_offset(str, n)));
876 }
877
878 ustring::size_type
find(const char * str,ustring::size_type i) const879 ustring::find(const char* str, ustring::size_type i) const
880 {
881 return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i)));
882 }
883
884 ustring::size_type
find(gunichar uc,ustring::size_type i) const885 ustring::find(gunichar uc, ustring::size_type i) const
886 {
887 const UnicharToUtf8 conv(uc);
888 return utf8_char_offset(string_, string_.find(conv.buf, utf8_byte_offset(string_, i), conv.len));
889 }
890
891 ustring::size_type
find(char c,ustring::size_type i) const892 ustring::find(char c, ustring::size_type i) const
893 {
894 return utf8_char_offset(string_, string_.find(c, utf8_byte_offset(string_, i)));
895 }
896
897 /**** Glib::ustring::rfind() ***********************************************/
898
899 ustring::size_type
rfind(const ustring & str,ustring::size_type i) const900 ustring::rfind(const ustring& str, ustring::size_type i) const
901 {
902 return utf8_char_offset(string_, string_.rfind(str.string_, utf8_byte_offset(string_, i)));
903 }
904
905 ustring::size_type
rfind(const char * str,ustring::size_type i,ustring::size_type n) const906 ustring::rfind(const char* str, ustring::size_type i, ustring::size_type n) const
907 {
908 return utf8_char_offset(
909 string_, string_.rfind(str, utf8_byte_offset(string_, i), utf8_byte_offset(str, n)));
910 }
911
912 ustring::size_type
rfind(const char * str,ustring::size_type i) const913 ustring::rfind(const char* str, ustring::size_type i) const
914 {
915 return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i)));
916 }
917
918 ustring::size_type
rfind(gunichar uc,ustring::size_type i) const919 ustring::rfind(gunichar uc, ustring::size_type i) const
920 {
921 const UnicharToUtf8 conv(uc);
922 return utf8_char_offset(string_, string_.rfind(conv.buf, utf8_byte_offset(string_, i), conv.len));
923 }
924
925 ustring::size_type
rfind(char c,ustring::size_type i) const926 ustring::rfind(char c, ustring::size_type i) const
927 {
928 return utf8_char_offset(string_, string_.rfind(c, utf8_byte_offset(string_, i)));
929 }
930
931 /**** Glib::ustring::find_first_of() ***************************************/
932
933 ustring::size_type
find_first_of(const ustring & match,ustring::size_type i) const934 ustring::find_first_of(const ustring& match, ustring::size_type i) const
935 {
936 return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), false);
937 }
938
939 ustring::size_type
find_first_of(const char * match,ustring::size_type i,ustring::size_type n) const940 ustring::find_first_of(const char* match, ustring::size_type i, ustring::size_type n) const
941 {
942 return utf8_find_first_of(string_, i, match, n, false);
943 }
944
945 ustring::size_type
find_first_of(const char * match,ustring::size_type i) const946 ustring::find_first_of(const char* match, ustring::size_type i) const
947 {
948 return utf8_find_first_of(string_, i, match, -1, false);
949 }
950
951 ustring::size_type
find_first_of(gunichar uc,ustring::size_type i) const952 ustring::find_first_of(gunichar uc, ustring::size_type i) const
953 {
954 return find(uc, i);
955 }
956
957 ustring::size_type
find_first_of(char c,ustring::size_type i) const958 ustring::find_first_of(char c, ustring::size_type i) const
959 {
960 return find(c, i);
961 }
962
963 /**** Glib::ustring::find_last_of() ****************************************/
964
965 ustring::size_type
find_last_of(const ustring & match,ustring::size_type i) const966 ustring::find_last_of(const ustring& match, ustring::size_type i) const
967 {
968 return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), false);
969 }
970
971 ustring::size_type
find_last_of(const char * match,ustring::size_type i,ustring::size_type n) const972 ustring::find_last_of(const char* match, ustring::size_type i, ustring::size_type n) const
973 {
974 return utf8_find_last_of(string_, i, match, n, false);
975 }
976
977 ustring::size_type
find_last_of(const char * match,ustring::size_type i) const978 ustring::find_last_of(const char* match, ustring::size_type i) const
979 {
980 return utf8_find_last_of(string_, i, match, -1, false);
981 }
982
983 ustring::size_type
find_last_of(gunichar uc,ustring::size_type i) const984 ustring::find_last_of(gunichar uc, ustring::size_type i) const
985 {
986 return rfind(uc, i);
987 }
988
989 ustring::size_type
find_last_of(char c,ustring::size_type i) const990 ustring::find_last_of(char c, ustring::size_type i) const
991 {
992 return rfind(c, i);
993 }
994
995 /**** Glib::ustring::find_first_not_of() ***********************************/
996
997 ustring::size_type
find_first_not_of(const ustring & match,ustring::size_type i) const998 ustring::find_first_not_of(const ustring& match, ustring::size_type i) const
999 {
1000 return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), true);
1001 }
1002
1003 ustring::size_type
find_first_not_of(const char * match,ustring::size_type i,ustring::size_type n) const1004 ustring::find_first_not_of(const char* match, ustring::size_type i, ustring::size_type n) const
1005 {
1006 return utf8_find_first_of(string_, i, match, n, true);
1007 }
1008
1009 ustring::size_type
find_first_not_of(const char * match,ustring::size_type i) const1010 ustring::find_first_not_of(const char* match, ustring::size_type i) const
1011 {
1012 return utf8_find_first_of(string_, i, match, -1, true);
1013 }
1014
1015 // Unfortunately, all of the find_*_not_of() methods for single
1016 // characters need their own special implementation.
1017 //
1018 ustring::size_type
find_first_not_of(gunichar uc,ustring::size_type i) const1019 ustring::find_first_not_of(gunichar uc, ustring::size_type i) const
1020 {
1021 const size_type bi = utf8_byte_offset(string_, i);
1022 if (bi != npos)
1023 {
1024 const char* const pbegin = string_.data();
1025 const char* const pend = pbegin + string_.size();
1026
1027 for (const char *p = pbegin + bi; p < pend; p = g_utf8_next_char(p), ++i)
1028 {
1029 if (g_utf8_get_char(p) != uc)
1030 return i;
1031 }
1032 }
1033 return npos;
1034 }
1035
1036 ustring::size_type
find_first_not_of(char c,ustring::size_type i) const1037 ustring::find_first_not_of(char c, ustring::size_type i) const
1038 {
1039 const size_type bi = utf8_byte_offset(string_, i);
1040 if (bi != npos)
1041 {
1042 const char* const pbegin = string_.data();
1043 const char* const pend = pbegin + string_.size();
1044
1045 for (const char *p = pbegin + bi; p < pend; p = g_utf8_next_char(p), ++i)
1046 {
1047 if (*p != c)
1048 return i;
1049 }
1050 }
1051 return npos;
1052 }
1053
1054 /**** Glib::ustring::find_last_not_of() ************************************/
1055
1056 ustring::size_type
find_last_not_of(const ustring & match,ustring::size_type i) const1057 ustring::find_last_not_of(const ustring& match, ustring::size_type i) const
1058 {
1059 return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), true);
1060 }
1061
1062 ustring::size_type
find_last_not_of(const char * match,ustring::size_type i,ustring::size_type n) const1063 ustring::find_last_not_of(const char* match, ustring::size_type i, ustring::size_type n) const
1064 {
1065 return utf8_find_last_of(string_, i, match, n, true);
1066 }
1067
1068 ustring::size_type
find_last_not_of(const char * match,ustring::size_type i) const1069 ustring::find_last_not_of(const char* match, ustring::size_type i) const
1070 {
1071 return utf8_find_last_of(string_, i, match, -1, true);
1072 }
1073
1074 // Unfortunately, all of the find_*_not_of() methods for single
1075 // characters need their own special implementation.
1076 //
1077 ustring::size_type
find_last_not_of(gunichar uc,ustring::size_type i) const1078 ustring::find_last_not_of(gunichar uc, ustring::size_type i) const
1079 {
1080 const char* const pbegin = string_.data();
1081 const char* const pend = pbegin + string_.size();
1082 size_type i_cur = 0;
1083 size_type i_found = npos;
1084
1085 for (const char *p = pbegin; p < pend && i_cur <= i; p = g_utf8_next_char(p), ++i_cur)
1086 {
1087 if (g_utf8_get_char(p) != uc)
1088 i_found = i_cur;
1089 }
1090 return i_found;
1091 }
1092
1093 ustring::size_type
find_last_not_of(char c,ustring::size_type i) const1094 ustring::find_last_not_of(char c, ustring::size_type i) const
1095 {
1096 const char* const pbegin = string_.data();
1097 const char* const pend = pbegin + string_.size();
1098 size_type i_cur = 0;
1099 size_type i_found = npos;
1100
1101 for (const char *p = pbegin; p < pend && i_cur <= i; p = g_utf8_next_char(p), ++i_cur)
1102 {
1103 if (*p != c)
1104 i_found = i_cur;
1105 }
1106 return i_found;
1107 }
1108
1109 /**** Glib::ustring -- get size and resize *********************************/
1110
1111 bool
empty() const1112 ustring::empty() const
1113 {
1114 return string_.empty();
1115 }
1116
1117 ustring::size_type
size() const1118 ustring::size() const
1119 {
1120 const char* const pdata = string_.data();
1121 return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
1122 }
1123
1124 ustring::size_type
length() const1125 ustring::length() const
1126 {
1127 const char* const pdata = string_.data();
1128 return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
1129 }
1130
1131 ustring::size_type
bytes() const1132 ustring::bytes() const
1133 {
1134 return string_.size();
1135 }
1136
1137 ustring::size_type
capacity() const1138 ustring::capacity() const
1139 {
1140 return string_.capacity();
1141 }
1142
1143 ustring::size_type
max_size() const1144 ustring::max_size() const
1145 {
1146 return string_.max_size();
1147 }
1148
1149 void
resize(ustring::size_type n,gunichar uc)1150 ustring::resize(ustring::size_type n, gunichar uc)
1151 {
1152 const size_type size_now = size();
1153 if (n < size_now)
1154 erase(n, npos);
1155 else if (n > size_now)
1156 append(n - size_now, uc);
1157 }
1158
1159 void
resize(ustring::size_type n,char c)1160 ustring::resize(ustring::size_type n, char c)
1161 {
1162 const size_type size_now = size();
1163 if (n < size_now)
1164 erase(n, npos);
1165 else if (n > size_now)
1166 string_.append(n - size_now, c);
1167 }
1168
1169 void
reserve(ustring::size_type n)1170 ustring::reserve(ustring::size_type n)
1171 {
1172 string_.reserve(n);
1173 }
1174
1175 /**** Glib::ustring -- C string access *************************************/
1176
1177 const char*
data() const1178 ustring::data() const
1179 {
1180 return string_.data();
1181 }
1182
1183 const char*
c_str() const1184 ustring::c_str() const
1185 {
1186 return string_.c_str();
1187 }
1188
1189 // Note that copy() requests UTF-8 character offsets as
1190 // parameters, but returns the number of copied bytes.
1191 //
1192 ustring::size_type
copy(char * dest,ustring::size_type n,ustring::size_type i) const1193 ustring::copy(char* dest, ustring::size_type n, ustring::size_type i) const
1194 {
1195 const Utf8SubstrBounds bounds(string_, i, n);
1196 return string_.copy(dest, bounds.n, bounds.i);
1197 }
1198
1199 /**** Glib::ustring -- UTF-8 utilities *************************************/
1200
1201 bool
validate() const1202 ustring::validate() const
1203 {
1204 return (g_utf8_validate(string_.data(), string_.size(), nullptr) != 0);
1205 }
1206
1207 bool
validate(ustring::iterator & first_invalid)1208 ustring::validate(ustring::iterator& first_invalid)
1209 {
1210 const char* const pdata = string_.data();
1211 const char* valid_end = pdata;
1212 const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
1213
1214 first_invalid = iterator(string_.begin() + (valid_end - pdata));
1215 return (is_valid != 0);
1216 }
1217
1218 bool
validate(ustring::const_iterator & first_invalid) const1219 ustring::validate(ustring::const_iterator& first_invalid) const
1220 {
1221 const char* const pdata = string_.data();
1222 const char* valid_end = pdata;
1223 const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
1224
1225 first_invalid = const_iterator(string_.begin() + (valid_end - pdata));
1226 return (is_valid != 0);
1227 }
1228
1229 ustring
make_valid() const1230 ustring::make_valid() const
1231 {
1232 return convert_return_gchar_ptr_to_ustring(g_utf8_make_valid(string_.data(), string_.size()));
1233 }
1234
1235 bool
is_ascii() const1236 ustring::is_ascii() const
1237 {
1238 const char* p = string_.data();
1239 const char* const pend = p + string_.size();
1240
1241 for (; p != pend; ++p)
1242 {
1243 if ((static_cast<unsigned char>(*p) & 0x80u) != 0)
1244 return false;
1245 }
1246
1247 return true;
1248 }
1249
1250 ustring
normalize(NormalizeMode mode) const1251 ustring::normalize(NormalizeMode mode) const
1252 {
1253 return convert_return_gchar_ptr_to_ustring(
1254 g_utf8_normalize(string_.data(), string_.size(), static_cast<GNormalizeMode>(int(mode))));
1255 }
1256
1257 ustring
uppercase() const1258 ustring::uppercase() const
1259 {
1260 return convert_return_gchar_ptr_to_ustring(g_utf8_strup(string_.data(), string_.size()));
1261 }
1262
1263 ustring
lowercase() const1264 ustring::lowercase() const
1265 {
1266 return convert_return_gchar_ptr_to_ustring(g_utf8_strdown(string_.data(), string_.size()));
1267 }
1268
1269 ustring
casefold() const1270 ustring::casefold() const
1271 {
1272 return convert_return_gchar_ptr_to_ustring(g_utf8_casefold(string_.data(), string_.size()));
1273 }
1274
1275 std::string
collate_key() const1276 ustring::collate_key() const
1277 {
1278 return convert_return_gchar_ptr_to_stdstring(g_utf8_collate_key(string_.data(), string_.size()));
1279 }
1280
1281 std::string
casefold_collate_key() const1282 ustring::casefold_collate_key() const
1283 {
1284 char* const casefold_buf = g_utf8_casefold(string_.data(), string_.size());
1285 char* const key_buf = g_utf8_collate_key(casefold_buf, -1);
1286 g_free(casefold_buf);
1287 return std::string(make_unique_ptr_gfree(key_buf).get());
1288 }
1289
1290 /**** Glib::ustring -- Message formatting **********************************/
1291
1292 // static
1293 ustring
compose_argv(const Glib::ustring & fmt,int argc,const ustring * const * argv)1294 ustring::compose_argv(const Glib::ustring& fmt, int argc, const ustring* const* argv)
1295 {
1296 std::string::size_type result_size = fmt.raw().size();
1297
1298 // Guesstimate the final string size.
1299 for (int i = 0; i < argc; ++i)
1300 result_size += argv[i]->raw().size();
1301
1302 std::string result;
1303 result.reserve(result_size);
1304
1305 const char* const pfmt = fmt.raw().c_str();
1306 const char* start = pfmt;
1307
1308 while (const char* const stop = std::strchr(start, '%'))
1309 {
1310 if (stop[1] == '%')
1311 {
1312 result.append(start, stop - start + 1);
1313 start = stop + 2;
1314 }
1315 else
1316 {
1317 const int index = Ascii::digit_value(stop[1]) - 1;
1318
1319 if (index >= 0 && index < argc)
1320 {
1321 result.append(start, stop - start);
1322 result += argv[index]->raw();
1323 start = stop + 2;
1324 }
1325 else
1326 {
1327 const char* const next = (stop[1] != '\0') ? g_utf8_next_char(stop + 1) : (stop + 1);
1328
1329 // Copy invalid substitutions literally to the output.
1330 result.append(start, next - start);
1331
1332 g_warning("invalid substitution \"%s\" in fmt string \"%s\"",
1333 result.c_str() + result.size() - (next - stop), pfmt);
1334 start = next;
1335 }
1336 }
1337 }
1338
1339 result.append(start, pfmt + fmt.raw().size() - start);
1340
1341 return result;
1342 }
1343
1344 /**** Glib::ustring::SequenceToString **************************************/
1345
SequenceToString(Glib::ustring::iterator pbegin,Glib::ustring::iterator pend)1346 ustring::SequenceToString<Glib::ustring::iterator, gunichar>::SequenceToString(
1347 Glib::ustring::iterator pbegin, Glib::ustring::iterator pend)
1348 : std::string(pbegin.base(), pend.base())
1349 {
1350 }
1351
SequenceToString(Glib::ustring::const_iterator pbegin,Glib::ustring::const_iterator pend)1352 ustring::SequenceToString<Glib::ustring::const_iterator, gunichar>::SequenceToString(
1353 Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend)
1354 : std::string(pbegin.base(), pend.base())
1355 {
1356 }
1357
1358 /**** Glib::ustring::FormatStream ******************************************/
1359
FormatStream()1360 ustring::FormatStream::FormatStream() : stream_()
1361 {
1362 }
1363
~FormatStream()1364 ustring::FormatStream::~FormatStream() noexcept
1365 {
1366 }
1367
1368 ustring
to_string() const1369 ustring::FormatStream::to_string() const
1370 {
1371 GError* error = nullptr;
1372
1373 #ifdef GLIBMM_HAVE_WIDE_STREAM
1374 const std::wstring str = stream_.str();
1375
1376 #if (defined(__STDC_ISO_10646__) || defined(_LIBCPP_VERSION)) && SIZEOF_WCHAR_T == 4
1377 // Avoid going through iconv if wchar_t always contains UCS-4.
1378 glong n_bytes = 0;
1379 const auto buf = make_unique_ptr_gfree(g_ucs4_to_utf8(
1380 reinterpret_cast<const gunichar*>(str.data()), str.size(), nullptr, &n_bytes, &error));
1381 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1382 // Avoid going through iconv if wchar_t always contains UTF-16.
1383 glong n_bytes = 0;
1384 const auto buf = make_unique_ptr_gfree(g_utf16_to_utf8(
1385 reinterpret_cast<const gunichar2*>(str.data()), str.size(), nullptr, &n_bytes, &error));
1386 #else
1387 gsize n_bytes = 0;
1388 const auto buf = make_unique_ptr_gfree(g_convert(reinterpret_cast<const char*>(str.data()),
1389 str.size() * sizeof(std::wstring::value_type), "UTF-8", "WCHAR_T", nullptr, &n_bytes, &error));
1390 #endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */
1391
1392 #else /* !GLIBMM_HAVE_WIDE_STREAM */
1393 const std::string str = stream_.str();
1394
1395 gsize n_bytes = 0;
1396 const auto buf =
1397 make_unique_ptr_gfree(g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error));
1398 #endif /* !GLIBMM_HAVE_WIDE_STREAM */
1399
1400 if (error)
1401 {
1402 Glib::Error::throw_exception(error);
1403 }
1404
1405 return ustring(buf.get(), buf.get() + n_bytes);
1406 }
1407
1408 /**** Glib::ustring -- stream I/O operators ********************************/
1409
1410 std::istream&
operator >>(std::istream & is,Glib::ustring & utf8_string)1411 operator>>(std::istream& is, Glib::ustring& utf8_string)
1412 {
1413 std::string str;
1414 is >> str;
1415
1416 GError* error = nullptr;
1417 gsize n_bytes = 0;
1418 const auto buf =
1419 make_unique_ptr_gfree(g_locale_to_utf8(str.data(), str.size(), nullptr, &n_bytes, &error));
1420
1421 if (error)
1422 {
1423 Glib::Error::throw_exception(error);
1424 }
1425
1426 utf8_string.assign(buf.get(), buf.get() + n_bytes);
1427
1428 return is;
1429 }
1430
1431 std::ostream&
operator <<(std::ostream & os,const Glib::ustring & utf8_string)1432 operator<<(std::ostream& os, const Glib::ustring& utf8_string)
1433 {
1434 GError* error = nullptr;
1435 const auto buf = make_unique_ptr_gfree(g_locale_from_utf8(
1436 utf8_string.raw().data(), utf8_string.raw().size(), nullptr, nullptr, &error));
1437 if (error)
1438 {
1439 Glib::Error::throw_exception(error);
1440 }
1441
1442 // This won't work if the string contains NUL characters. Unfortunately,
1443 // std::ostream::write() ignores format flags, so we cannot use that.
1444 // The only option would be to create a temporary std::string. However,
1445 // even then GCC's libstdc++-v3 prints only the characters up to the first
1446 // NUL. Given this, there doesn't seem much of a point in allowing NUL in
1447 // formatted output. The semantics would be unclear anyway: what's the
1448 // screen width of a NUL?
1449 os << buf.get();
1450
1451 return os;
1452 }
1453
1454 #ifdef GLIBMM_HAVE_WIDE_STREAM
1455
1456 std::wistream&
operator >>(std::wistream & is,ustring & utf8_string)1457 operator>>(std::wistream& is, ustring& utf8_string)
1458 {
1459 GError* error = nullptr;
1460
1461 std::wstring wstr;
1462 is >> wstr;
1463
1464 #if (defined(__STDC_ISO_10646__) || defined(_LIBCPP_VERSION)) && SIZEOF_WCHAR_T == 4
1465 // Avoid going through iconv if wchar_t always contains UCS-4.
1466 glong n_bytes = 0;
1467 const auto buf = make_unique_ptr_gfree(g_ucs4_to_utf8(
1468 reinterpret_cast<const gunichar*>(wstr.data()), wstr.size(), nullptr, &n_bytes, &error));
1469 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1470 // Avoid going through iconv if wchar_t always contains UTF-16.
1471 glong n_bytes = 0;
1472 const auto buf = make_unique_ptr_gfree(g_utf16_to_utf8(
1473 reinterpret_cast<const gunichar2*>(wstr.data()), wstr.size(), nullptr, &n_bytes, &error));
1474 #else
1475 gsize n_bytes = 0;
1476 const auto buf = make_unique_ptr_gfree(g_convert(reinterpret_cast<const char*>(wstr.data()),
1477 wstr.size() * sizeof(std::wstring::value_type), "UTF-8", "WCHAR_T", nullptr, &n_bytes, &error));
1478 #endif // !(__STDC_ISO_10646__ || G_OS_WIN32)
1479
1480 if (error)
1481 {
1482 Glib::Error::throw_exception(error);
1483 }
1484
1485 utf8_string.assign(buf.get(), buf.get() + n_bytes);
1486
1487 return is;
1488 }
1489
1490 std::wostream&
operator <<(std::wostream & os,const ustring & utf8_string)1491 operator<<(std::wostream& os, const ustring& utf8_string)
1492 {
1493 GError* error = nullptr;
1494
1495 #if (defined(__STDC_ISO_10646__) || defined(_LIBCPP_VERSION)) && SIZEOF_WCHAR_T == 4
1496 // Avoid going through iconv if wchar_t always contains UCS-4.
1497 const auto buf = make_unique_ptr_gfree(
1498 g_utf8_to_ucs4(utf8_string.raw().data(), utf8_string.raw().size(), nullptr, nullptr, &error));
1499 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1500 // Avoid going through iconv if wchar_t always contains UTF-16.
1501 const auto buf = make_unique_ptr_gfree(
1502 g_utf8_to_utf16(utf8_string.raw().data(), utf8_string.raw().size(), nullptr, nullptr, &error));
1503 #else
1504 const auto buf = make_unique_ptr_gfree(g_convert(utf8_string.raw().data(),
1505 utf8_string.raw().size(), "WCHAR_T", "UTF-8", nullptr, nullptr, &error));
1506 #endif // !(__STDC_ISO_10646__ || G_OS_WIN32)
1507
1508 if (error)
1509 {
1510 Glib::Error::throw_exception(error);
1511 }
1512
1513 // This won't work if the string contains NUL characters. Unfortunately,
1514 // std::wostream::write() ignores format flags, so we cannot use that.
1515 // The only option would be to create a temporary std::wstring. However,
1516 // even then GCC's libstdc++-v3 prints only the characters up to the first
1517 // NUL. Given this, there doesn't seem much of a point in allowing NUL in
1518 // formatted output. The semantics would be unclear anyway: what's the
1519 // screen width of a NUL?
1520 os << reinterpret_cast<wchar_t*>(buf.get());
1521
1522 return os;
1523 }
1524
1525 #endif /* GLIBMM_HAVE_WIDE_STREAM */
1526
1527 } // namespace Glib
1528