1 /* Copyright (C) 2002 The gtkmm Development Team
2  *
3  * This library is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU Lesser General Public
5  * License as published by the Free Software Foundation; either
6  * version 2.1 of the License, or (at your option) any later version.
7  *
8  * This library is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * Lesser General Public License for more details.
12  *
13  * You should have received a copy of the GNU Lesser General Public
14  * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 #include <glibmmconfig.h>
18 #include <glibmm/ustring.h>
19 #include <glibmm/convert.h>
20 #include <glibmm/error.h>
21 #include <glibmm/utility.h>
22 
23 #include <algorithm>
24 #include <iostream>
25 #include <cstring>
26 #include <stdexcept>
27 #include <utility> // For std::move()
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 
32 // If glibmm is built with Autotools, GLIBMM_SIZEOF_WCHAR_T is not defined and
33 // SIZEOF_WCHAR_T is defined in config.h.
34 // If glibmm is built with Meson, config.h does not exist and
35 // GLIBMM_SIZEOF_WCHAR_T is defined in glibmmconfig.h.
36 #if !defined(SIZEOF_WCHAR_T) && defined(GLIBMM_SIZEOF_WCHAR_T)
37 #define SIZEOF_WCHAR_T GLIBMM_SIZEOF_WCHAR_T
38 #endif
39 
40 namespace
41 {
42 
43 using Glib::ustring;
44 
45 // Little helper to make the conversion from gunichar to UTF-8 a one-liner.
46 //
47 struct UnicharToUtf8
48 {
49   char buf[6];
50   ustring::size_type len;
51 
UnicharToUtf8__anonbff8c9220111::UnicharToUtf852   explicit UnicharToUtf8(gunichar uc) : len(g_unichar_to_utf8(uc, buf)) {}
53 };
54 
55 // All utf8_*_offset() functions return npos if offset is out of range.
56 // The caller should decide if npos is a valid argument and just marks
57 // the whole string, or if it is not allowed (e.g. for start positions).
58 // In the latter case std::out_of_range should be thrown, but usually
59 // std::string will do that for us.
60 
61 // First overload: stop on '\0' character.
62 static ustring::size_type
utf8_byte_offset(const char * str,ustring::size_type offset)63 utf8_byte_offset(const char* str, ustring::size_type offset)
64 {
65   if (offset == ustring::npos)
66     return ustring::npos;
67 
68   const char* const utf8_skip = g_utf8_skip;
69   const char* p = str;
70 
71   for (; offset != 0; --offset)
72   {
73     const unsigned int c = static_cast<unsigned char>(*p);
74 
75     if (c == 0)
76       return ustring::npos;
77 
78     p += utf8_skip[c];
79   }
80 
81   return (p - str);
82 }
83 
84 // Second overload: stop when reaching maxlen.
85 static ustring::size_type
utf8_byte_offset(const char * str,ustring::size_type offset,ustring::size_type maxlen)86 utf8_byte_offset(const char* str, ustring::size_type offset, ustring::size_type maxlen)
87 {
88   if (offset == ustring::npos)
89     return ustring::npos;
90 
91   const char* const utf8_skip = g_utf8_skip;
92   const char* const pend = str + maxlen;
93   const char* p = str;
94 
95   for (; offset != 0; --offset)
96   {
97     if (p >= pend)
98       return ustring::npos;
99 
100     p += utf8_skip[static_cast<unsigned char>(*p)];
101   }
102 
103   return (p - str);
104 }
105 
106 // Third overload: stop when reaching str.size().
107 //
108 inline ustring::size_type
utf8_byte_offset(const std::string & str,ustring::size_type offset)109 utf8_byte_offset(const std::string& str, ustring::size_type offset)
110 {
111   return utf8_byte_offset(str.data(), offset, str.size());
112 }
113 
114 // Takes UTF-8 character offset and count in ci and cn.
115 // Returns the byte offset and count in i and n.
116 //
117 struct Utf8SubstrBounds
118 {
119   ustring::size_type i;
120   ustring::size_type n;
121 
Utf8SubstrBounds__anonbff8c9220111::Utf8SubstrBounds122   Utf8SubstrBounds(const std::string& str, ustring::size_type ci, ustring::size_type cn)
123   : i(utf8_byte_offset(str, ci)), n(ustring::npos)
124   {
125     if (i != ustring::npos)
126       n = utf8_byte_offset(str.data() + i, cn, str.size() - i);
127   }
128 };
129 
130 // Converts byte offset to UTF-8 character offset.
131 inline ustring::size_type
utf8_char_offset(const std::string & str,ustring::size_type offset)132 utf8_char_offset(const std::string& str, ustring::size_type offset)
133 {
134   if (offset == ustring::npos)
135     return ustring::npos;
136 
137   const char* const pdata = str.data();
138   return g_utf8_pointer_to_offset(pdata, pdata + offset);
139 }
140 
141 // Helper to implement ustring::find_first_of() and find_first_not_of().
142 // Returns the UTF-8 character offset, or ustring::npos if not found.
143 static ustring::size_type
utf8_find_first_of(const std::string & str,ustring::size_type offset,const char * utf8_match,long utf8_match_size,bool find_not_of)144 utf8_find_first_of(const std::string& str, ustring::size_type offset, const char* utf8_match,
145   long utf8_match_size, bool find_not_of)
146 {
147   const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
148   if (byte_offset == ustring::npos)
149     return ustring::npos;
150 
151   long ucs4_match_size = 0;
152   const auto ucs4_match =
153     Glib::make_unique_ptr_gfree(g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
154 
155   const gunichar* const match_begin = ucs4_match.get();
156   const gunichar* const match_end = match_begin + ucs4_match_size;
157 
158   const char* const str_begin = str.data();
159   const char* const str_end = str_begin + str.size();
160 
161   for (const char* pstr = str_begin + byte_offset; pstr < str_end; pstr = g_utf8_next_char(pstr))
162   {
163     const gunichar* const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
164 
165     if ((pfound != match_end) != find_not_of)
166       return offset;
167 
168     ++offset;
169   }
170 
171   return ustring::npos;
172 }
173 
174 // Helper to implement ustring::find_last_of() and find_last_not_of().
175 // Returns the UTF-8 character offset, or ustring::npos if not found.
176 static ustring::size_type
utf8_find_last_of(const std::string & str,ustring::size_type offset,const char * utf8_match,long utf8_match_size,bool find_not_of)177 utf8_find_last_of(const std::string& str, ustring::size_type offset, const char* utf8_match,
178   long utf8_match_size, bool find_not_of)
179 {
180   long ucs4_match_size = 0;
181   const auto ucs4_match =
182     Glib::make_unique_ptr_gfree(g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
183 
184   const gunichar* const match_begin = ucs4_match.get();
185   const gunichar* const match_end = match_begin + ucs4_match_size;
186 
187   const char* const str_begin = str.data();
188   const char* pstr = str_begin;
189 
190   // Set pstr one byte beyond the actual start position.
191   const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
192   pstr += (byte_offset < str.size()) ? byte_offset + 1 : str.size();
193 
194   while (pstr > str_begin)
195   {
196     // Move to previous character.
197     do
198       --pstr;
199     while ((static_cast<unsigned char>(*pstr) & 0xC0u) == 0x80);
200 
201     const gunichar* const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
202 
203     if ((pfound != match_end) != find_not_of)
204       return g_utf8_pointer_to_offset(str_begin, pstr);
205   }
206 
207   return ustring::npos;
208 }
209 
210 } // anonymous namespace
211 
212 namespace Glib
213 {
214 
215 #ifndef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS
216 // Initialize static member here,
217 // because the compiler did not allow us do it inline.
218 const ustring::size_type ustring::npos = std::string::npos;
219 #endif
220 
221 /*
222  * We need our own version of g_utf8_get_char(), because the std::string
223  * iterator is not necessarily a plain pointer (it's in fact not in GCC's
224  * libstdc++-v3).  Copying the UTF-8 data into a temporary buffer isn't an
225  * option since this operation is quite time critical.  The implementation
226  * is quite different from g_utf8_get_char() -- both more generic and likely
227  * faster.
228  *
229  * By looking at the first byte of a UTF-8 character one can determine the
230  * number of bytes used.  GLib offers the g_utf8_skip[] array for this purpose,
231  * but accessing this global variable would, on IA32 at least, introduce
232  * a function call to fetch the Global Offset Table, plus two levels of
233  * indirection in order to read the value.  Even worse, fetching the GOT is
234  * always done right at the start of the function instead of the branch that
235  * actually uses the variable.
236  *
237  * Fortunately, there's a better way to get the byte count.  As this table
238  * shows, there's a nice regular pattern in the UTF-8 encoding scheme:
239  *
240  * 0x00000000 - 0x0000007F: 0xxxxxxx
241  * 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx
242  * 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
243  * 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
244  * 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
245  * 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
246  *
247  * Except for the single byte case, the number of leading 1-bits equals the
248  * byte count.  All that is needed is to shift the first byte to the left
249  * until bit 7 becomes 0.  Naturally, doing so requires a loop -- but since
250  * we already have one, no additional cost is introduced.  This shifting can
251  * further be combined with the computation of the bitmask needed to eliminate
252  * the leading length bits, thus saving yet another register.
253  *
254  * Note:  If you change this code, it is advisable to also review what the
255  * compiler makes of it in the assembler output.  Except for some pointless
256  * register moves, the generated code is sufficiently close to the optimum
257  * with GCC 4.1.2 on x86_64.
258  */
259 gunichar
get_unichar_from_std_iterator(std::string::const_iterator pos)260 get_unichar_from_std_iterator(std::string::const_iterator pos)
261 {
262   unsigned int result = static_cast<unsigned char>(*pos);
263 
264   if ((result & 0x80) != 0)
265   {
266     unsigned int mask = 0x40;
267 
268     do
269     {
270       result <<= 6;
271       const unsigned int c = static_cast<unsigned char>(*++pos);
272       mask <<= 5;
273       result += c - 0x80;
274     } while ((result & mask) != 0);
275 
276     result &= mask - 1;
277   }
278 
279   return result;
280 }
281 
282 /**** Glib::ustring ********************************************************/
283 
ustring()284 ustring::ustring() : string_()
285 {
286 }
287 
ustring(const ustring & other)288 ustring::ustring(const ustring& other) : string_(other.string_)
289 {
290 }
291 
ustring(ustring && other)292 ustring::ustring(ustring&& other) : string_(std::move(other.string_))
293 {
294 }
295 
ustring(const ustring & src,ustring::size_type i,ustring::size_type n)296 ustring::ustring(const ustring& src, ustring::size_type i, ustring::size_type n) : string_()
297 {
298   const Utf8SubstrBounds bounds(src.string_, i, n);
299   string_.assign(src.string_, bounds.i, bounds.n);
300 }
301 
ustring(const char * src,ustring::size_type n)302 ustring::ustring(const char* src, ustring::size_type n) : string_(src, utf8_byte_offset(src, n))
303 {
304 }
305 
ustring(const char * src)306 ustring::ustring(const char* src) : string_(src)
307 {
308 }
309 
ustring(ustring::size_type n,gunichar uc)310 ustring::ustring(ustring::size_type n, gunichar uc) : string_()
311 {
312   if (uc < 0x80)
313   {
314     // Optimize the probably most common case.
315     string_.assign(n, static_cast<char>(uc));
316   }
317   else
318   {
319     const UnicharToUtf8 conv(uc);
320     string_.reserve(n * conv.len);
321 
322     for (; n > 0; --n)
323       string_.append(conv.buf, conv.len);
324   }
325 }
326 
ustring(ustring::size_type n,char c)327 ustring::ustring(ustring::size_type n, char c) : string_(n, c)
328 {
329 }
330 
ustring(const std::string & src)331 ustring::ustring(const std::string& src) : string_(src)
332 {
333 }
334 
ustring(std::string && src)335 ustring::ustring(std::string&& src) : string_(std::move(src))
336 {
337 }
338 
~ustring()339 ustring::~ustring() noexcept
340 {
341 }
342 
343 void
swap(ustring & other)344 ustring::swap(ustring& other)
345 {
346   string_.swap(other.string_);
347 }
348 
349 /**** Glib::ustring::operator=() *******************************************/
350 
351 ustring&
operator =(const ustring & other)352 ustring::operator=(const ustring& other)
353 {
354   string_ = other.string_;
355   return *this;
356 }
357 
358 ustring&
operator =(ustring && other)359 ustring::operator=(ustring&& other)
360 {
361   string_ = std::move(other.string_);
362   return *this;
363 }
364 
365 ustring&
operator =(const std::string & src)366 ustring::operator=(const std::string& src)
367 {
368   string_ = src;
369   return *this;
370 }
371 
372 ustring&
operator =(std::string && src)373 ustring::operator=(std::string&& src)
374 {
375   string_ = std::move(src);
376   return *this;
377 }
378 
379 ustring&
operator =(const char * src)380 ustring::operator=(const char* src)
381 {
382   string_ = src;
383   return *this;
384 }
385 
386 ustring&
operator =(gunichar uc)387 ustring::operator=(gunichar uc)
388 {
389   const UnicharToUtf8 conv(uc);
390   string_.assign(conv.buf, conv.len);
391   return *this;
392 }
393 
394 ustring&
operator =(char c)395 ustring::operator=(char c)
396 {
397   string_ = c;
398   return *this;
399 }
400 
401 /**** Glib::ustring::assign() **********************************************/
402 
403 ustring&
assign(const ustring & src)404 ustring::assign(const ustring& src)
405 {
406   string_ = src.string_;
407   return *this;
408 }
409 
410 ustring&
assign(ustring && src)411 ustring::assign(ustring&& src)
412 {
413   string_ = std::move(src.string_);
414   return *this;
415 }
416 
417 ustring&
assign(const ustring & src,ustring::size_type i,ustring::size_type n)418 ustring::assign(const ustring& src, ustring::size_type i, ustring::size_type n)
419 {
420   const Utf8SubstrBounds bounds(src.string_, i, n);
421   string_.assign(src.string_, bounds.i, bounds.n);
422   return *this;
423 }
424 
425 ustring&
assign(const char * src,ustring::size_type n)426 ustring::assign(const char* src, ustring::size_type n)
427 {
428   string_.assign(src, utf8_byte_offset(src, n));
429   return *this;
430 }
431 
432 ustring&
assign(const char * src)433 ustring::assign(const char* src)
434 {
435   string_ = src;
436   return *this;
437 }
438 
439 ustring&
assign(ustring::size_type n,gunichar uc)440 ustring::assign(ustring::size_type n, gunichar uc)
441 {
442   ustring temp(n, uc);
443   string_.swap(temp.string_);
444   return *this;
445 }
446 
447 ustring&
assign(ustring::size_type n,char c)448 ustring::assign(ustring::size_type n, char c)
449 {
450   string_.assign(n, c);
451   return *this;
452 }
453 
454 /**** Glib::ustring::operator+=() ******************************************/
455 
456 ustring&
operator +=(const ustring & src)457 ustring::operator+=(const ustring& src)
458 {
459   string_ += src.string_;
460   return *this;
461 }
462 
463 ustring&
operator +=(const char * src)464 ustring::operator+=(const char* src)
465 {
466   string_ += src;
467   return *this;
468 }
469 
470 ustring&
operator +=(gunichar uc)471 ustring::operator+=(gunichar uc)
472 {
473   const UnicharToUtf8 conv(uc);
474   string_.append(conv.buf, conv.len);
475   return *this;
476 }
477 
478 ustring&
operator +=(char c)479 ustring::operator+=(char c)
480 {
481   string_ += c;
482   return *this;
483 }
484 
485 /**** Glib::ustring::push_back() *******************************************/
486 
487 void
push_back(gunichar uc)488 ustring::push_back(gunichar uc)
489 {
490   const UnicharToUtf8 conv(uc);
491   string_.append(conv.buf, conv.len);
492 }
493 
494 void
push_back(char c)495 ustring::push_back(char c)
496 {
497   string_ += c;
498 }
499 
500 /**** Glib::ustring::append() **********************************************/
501 
502 ustring&
append(const ustring & src)503 ustring::append(const ustring& src)
504 {
505   string_ += src.string_;
506   return *this;
507 }
508 
509 ustring&
append(const ustring & src,ustring::size_type i,ustring::size_type n)510 ustring::append(const ustring& src, ustring::size_type i, ustring::size_type n)
511 {
512   const Utf8SubstrBounds bounds(src.string_, i, n);
513   string_.append(src.string_, bounds.i, bounds.n);
514   return *this;
515 }
516 
517 ustring&
append(const char * src,ustring::size_type n)518 ustring::append(const char* src, ustring::size_type n)
519 {
520   string_.append(src, utf8_byte_offset(src, n));
521   return *this;
522 }
523 
524 ustring&
append(const char * src)525 ustring::append(const char* src)
526 {
527   string_ += src;
528   return *this;
529 }
530 
531 ustring&
append(ustring::size_type n,gunichar uc)532 ustring::append(ustring::size_type n, gunichar uc)
533 {
534   string_.append(ustring(n, uc).string_);
535   return *this;
536 }
537 
538 ustring&
append(ustring::size_type n,char c)539 ustring::append(ustring::size_type n, char c)
540 {
541   string_.append(n, c);
542   return *this;
543 }
544 
545 /**** Glib::ustring::insert() **********************************************/
546 
547 ustring&
insert(ustring::size_type i,const ustring & src)548 ustring::insert(ustring::size_type i, const ustring& src)
549 {
550   string_.insert(utf8_byte_offset(string_, i), src.string_);
551   return *this;
552 }
553 
554 ustring&
insert(ustring::size_type i,const ustring & src,ustring::size_type i2,ustring::size_type n)555 ustring::insert(
556   ustring::size_type i, const ustring& src, ustring::size_type i2, ustring::size_type n)
557 {
558   const Utf8SubstrBounds bounds2(src.string_, i2, n);
559   string_.insert(utf8_byte_offset(string_, i), src.string_, bounds2.i, bounds2.n);
560   return *this;
561 }
562 
563 ustring&
insert(ustring::size_type i,const char * src,ustring::size_type n)564 ustring::insert(ustring::size_type i, const char* src, ustring::size_type n)
565 {
566   string_.insert(utf8_byte_offset(string_, i), src, utf8_byte_offset(src, n));
567   return *this;
568 }
569 
570 ustring&
insert(ustring::size_type i,const char * src)571 ustring::insert(ustring::size_type i, const char* src)
572 {
573   string_.insert(utf8_byte_offset(string_, i), src);
574   return *this;
575 }
576 
577 ustring&
insert(ustring::size_type i,ustring::size_type n,gunichar uc)578 ustring::insert(ustring::size_type i, ustring::size_type n, gunichar uc)
579 {
580   string_.insert(utf8_byte_offset(string_, i), ustring(n, uc).string_);
581   return *this;
582 }
583 
584 ustring&
insert(ustring::size_type i,ustring::size_type n,char c)585 ustring::insert(ustring::size_type i, ustring::size_type n, char c)
586 {
587   string_.insert(utf8_byte_offset(string_, i), n, c);
588   return *this;
589 }
590 
591 ustring::iterator
insert(ustring::iterator p,gunichar uc)592 ustring::insert(ustring::iterator p, gunichar uc)
593 {
594   const size_type offset = p.base() - string_.begin();
595   const UnicharToUtf8 conv(uc);
596   string_.insert(offset, conv.buf, conv.len);
597   return iterator(string_.begin() + offset);
598 }
599 
600 ustring::iterator
insert(ustring::iterator p,char c)601 ustring::insert(ustring::iterator p, char c)
602 {
603   return iterator(string_.insert(p.base(), c));
604 }
605 
606 void
insert(ustring::iterator p,ustring::size_type n,gunichar uc)607 ustring::insert(ustring::iterator p, ustring::size_type n, gunichar uc)
608 {
609   string_.insert(p.base() - string_.begin(), ustring(n, uc).string_);
610 }
611 
612 void
insert(ustring::iterator p,ustring::size_type n,char c)613 ustring::insert(ustring::iterator p, ustring::size_type n, char c)
614 {
615   string_.insert(p.base(), n, c);
616 }
617 
618 /**** Glib::ustring::replace() *********************************************/
619 
620 ustring&
replace(ustring::size_type i,ustring::size_type n,const ustring & src)621 ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src)
622 {
623   const Utf8SubstrBounds bounds(string_, i, n);
624   string_.replace(bounds.i, bounds.n, src.string_);
625   return *this;
626 }
627 
628 ustring&
replace(ustring::size_type i,ustring::size_type n,const ustring & src,ustring::size_type i2,ustring::size_type n2)629 ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src,
630   ustring::size_type i2, ustring::size_type n2)
631 {
632   const Utf8SubstrBounds bounds(string_, i, n);
633   const Utf8SubstrBounds bounds2(src.string_, i2, n2);
634   string_.replace(bounds.i, bounds.n, src.string_, bounds2.i, bounds2.n);
635   return *this;
636 }
637 
638 ustring&
replace(ustring::size_type i,ustring::size_type n,const char * src,ustring::size_type n2)639 ustring::replace(ustring::size_type i, ustring::size_type n, const char* src, ustring::size_type n2)
640 {
641   const Utf8SubstrBounds bounds(string_, i, n);
642   string_.replace(bounds.i, bounds.n, src, utf8_byte_offset(src, n2));
643   return *this;
644 }
645 
646 ustring&
replace(ustring::size_type i,ustring::size_type n,const char * src)647 ustring::replace(ustring::size_type i, ustring::size_type n, const char* src)
648 {
649   const Utf8SubstrBounds bounds(string_, i, n);
650   string_.replace(bounds.i, bounds.n, src);
651   return *this;
652 }
653 
654 ustring&
replace(ustring::size_type i,ustring::size_type n,ustring::size_type n2,gunichar uc)655 ustring::replace(ustring::size_type i, ustring::size_type n, ustring::size_type n2, gunichar uc)
656 {
657   const Utf8SubstrBounds bounds(string_, i, n);
658   string_.replace(bounds.i, bounds.n, ustring(n2, uc).string_);
659   return *this;
660 }
661 
662 ustring&
replace(ustring::size_type i,ustring::size_type n,ustring::size_type n2,char c)663 ustring::replace(ustring::size_type i, ustring::size_type n, ustring::size_type n2, char c)
664 {
665   const Utf8SubstrBounds bounds(string_, i, n);
666   string_.replace(bounds.i, bounds.n, n2, c);
667   return *this;
668 }
669 
670 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,const ustring & src)671 ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const ustring& src)
672 {
673   string_.replace(pbegin.base(), pend.base(), src.string_);
674   return *this;
675 }
676 
677 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,const char * src,ustring::size_type n)678 ustring::replace(
679   ustring::iterator pbegin, ustring::iterator pend, const char* src, ustring::size_type n)
680 {
681   string_.replace(pbegin.base(), pend.base(), src, utf8_byte_offset(src, n));
682   return *this;
683 }
684 
685 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,const char * src)686 ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const char* src)
687 {
688   string_.replace(pbegin.base(), pend.base(), src);
689   return *this;
690 }
691 
692 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,ustring::size_type n,gunichar uc)693 ustring::replace(
694   ustring::iterator pbegin, ustring::iterator pend, ustring::size_type n, gunichar uc)
695 {
696   string_.replace(pbegin.base(), pend.base(), ustring(n, uc).string_);
697   return *this;
698 }
699 
700 ustring&
replace(ustring::iterator pbegin,ustring::iterator pend,ustring::size_type n,char c)701 ustring::replace(ustring::iterator pbegin, ustring::iterator pend, ustring::size_type n, char c)
702 {
703   string_.replace(pbegin.base(), pend.base(), n, c);
704   return *this;
705 }
706 
707 /**** Glib::ustring::erase() ***********************************************/
708 
709 void
clear()710 ustring::clear()
711 {
712   string_.erase();
713 }
714 
715 ustring&
erase(ustring::size_type i,ustring::size_type n)716 ustring::erase(ustring::size_type i, ustring::size_type n)
717 {
718   const Utf8SubstrBounds bounds(string_, i, n);
719   string_.erase(bounds.i, bounds.n);
720   return *this;
721 }
722 
723 ustring&
erase()724 ustring::erase()
725 {
726   string_.erase();
727   return *this;
728 }
729 
730 ustring::iterator
erase(ustring::iterator p)731 ustring::erase(ustring::iterator p)
732 {
733   ustring::iterator iter_end = p;
734   ++iter_end;
735 
736   return iterator(string_.erase(p.base(), iter_end.base()));
737 }
738 
739 ustring::iterator
erase(ustring::iterator pbegin,ustring::iterator pend)740 ustring::erase(ustring::iterator pbegin, ustring::iterator pend)
741 {
742   return iterator(string_.erase(pbegin.base(), pend.base()));
743 }
744 
745 /**** Glib::ustring::compare() *********************************************/
746 
747 int
compare(const ustring & rhs) const748 ustring::compare(const ustring& rhs) const
749 {
750   return g_utf8_collate(string_.c_str(), rhs.string_.c_str());
751 }
752 
753 int
compare(const char * rhs) const754 ustring::compare(const char* rhs) const
755 {
756   return g_utf8_collate(string_.c_str(), rhs);
757 }
758 
759 int
compare(ustring::size_type i,ustring::size_type n,const ustring & rhs) const760 ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs) const
761 {
762   return ustring(*this, i, n).compare(rhs);
763 }
764 
765 int
compare(ustring::size_type i,ustring::size_type n,const ustring & rhs,ustring::size_type i2,ustring::size_type n2) const766 ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs,
767   ustring::size_type i2, ustring::size_type n2) const
768 {
769   return ustring(*this, i, n).compare(ustring(rhs, i2, n2));
770 }
771 
772 int
compare(ustring::size_type i,ustring::size_type n,const char * rhs,ustring::size_type n2) const773 ustring::compare(
774   ustring::size_type i, ustring::size_type n, const char* rhs, ustring::size_type n2) const
775 {
776   return ustring(*this, i, n).compare(ustring(rhs, n2));
777 }
778 
779 int
compare(ustring::size_type i,ustring::size_type n,const char * rhs) const780 ustring::compare(ustring::size_type i, ustring::size_type n, const char* rhs) const
781 {
782   return ustring(*this, i, n).compare(rhs);
783 }
784 
785 /**** Glib::ustring -- index access ****************************************/
786 
operator [](ustring::size_type i) const787 ustring::value_type ustring::operator[](ustring::size_type i) const
788 {
789   return g_utf8_get_char(g_utf8_offset_to_pointer(string_.data(), i));
790 }
791 
792 ustring::value_type
at(ustring::size_type i) const793 ustring::at(ustring::size_type i) const
794 {
795   const size_type byte_offset = utf8_byte_offset(string_, i);
796 
797   // Throws std::out_of_range if the index is invalid.
798   return g_utf8_get_char(&string_.at(byte_offset));
799 }
800 
801 /**** Glib::ustring -- iterator access *************************************/
802 
803 ustring::iterator
begin()804 ustring::begin()
805 {
806   return iterator(string_.begin());
807 }
808 
809 ustring::iterator
end()810 ustring::end()
811 {
812   return iterator(string_.end());
813 }
814 
815 ustring::const_iterator
begin() const816 ustring::begin() const
817 {
818   return const_iterator(string_.begin());
819 }
820 
821 ustring::const_iterator
end() const822 ustring::end() const
823 {
824   return const_iterator(string_.end());
825 }
826 
827 ustring::reverse_iterator
rbegin()828 ustring::rbegin()
829 {
830   return reverse_iterator(iterator(string_.end()));
831 }
832 
833 ustring::reverse_iterator
rend()834 ustring::rend()
835 {
836   return reverse_iterator(iterator(string_.begin()));
837 }
838 
839 ustring::const_reverse_iterator
rbegin() const840 ustring::rbegin() const
841 {
842   return const_reverse_iterator(const_iterator(string_.end()));
843 }
844 
845 ustring::const_reverse_iterator
rend() const846 ustring::rend() const
847 {
848   return const_reverse_iterator(const_iterator(string_.begin()));
849 }
850 
851 ustring::const_iterator
cbegin() const852 ustring::cbegin() const
853 {
854   return const_iterator(string_.begin());
855 }
856 
857 ustring::const_iterator
cend() const858 ustring::cend() const
859 {
860   return const_iterator(string_.end());
861 }
862 
863 /**** Glib::ustring::find() ************************************************/
864 
865 ustring::size_type
find(const ustring & str,ustring::size_type i) const866 ustring::find(const ustring& str, ustring::size_type i) const
867 {
868   return utf8_char_offset(string_, string_.find(str.string_, utf8_byte_offset(string_, i)));
869 }
870 
871 ustring::size_type
find(const char * str,ustring::size_type i,ustring::size_type n) const872 ustring::find(const char* str, ustring::size_type i, ustring::size_type n) const
873 {
874   return utf8_char_offset(
875     string_, string_.find(str, utf8_byte_offset(string_, i), utf8_byte_offset(str, n)));
876 }
877 
878 ustring::size_type
find(const char * str,ustring::size_type i) const879 ustring::find(const char* str, ustring::size_type i) const
880 {
881   return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i)));
882 }
883 
884 ustring::size_type
find(gunichar uc,ustring::size_type i) const885 ustring::find(gunichar uc, ustring::size_type i) const
886 {
887   const UnicharToUtf8 conv(uc);
888   return utf8_char_offset(string_, string_.find(conv.buf, utf8_byte_offset(string_, i), conv.len));
889 }
890 
891 ustring::size_type
find(char c,ustring::size_type i) const892 ustring::find(char c, ustring::size_type i) const
893 {
894   return utf8_char_offset(string_, string_.find(c, utf8_byte_offset(string_, i)));
895 }
896 
897 /**** Glib::ustring::rfind() ***********************************************/
898 
899 ustring::size_type
rfind(const ustring & str,ustring::size_type i) const900 ustring::rfind(const ustring& str, ustring::size_type i) const
901 {
902   return utf8_char_offset(string_, string_.rfind(str.string_, utf8_byte_offset(string_, i)));
903 }
904 
905 ustring::size_type
rfind(const char * str,ustring::size_type i,ustring::size_type n) const906 ustring::rfind(const char* str, ustring::size_type i, ustring::size_type n) const
907 {
908   return utf8_char_offset(
909     string_, string_.rfind(str, utf8_byte_offset(string_, i), utf8_byte_offset(str, n)));
910 }
911 
912 ustring::size_type
rfind(const char * str,ustring::size_type i) const913 ustring::rfind(const char* str, ustring::size_type i) const
914 {
915   return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i)));
916 }
917 
918 ustring::size_type
rfind(gunichar uc,ustring::size_type i) const919 ustring::rfind(gunichar uc, ustring::size_type i) const
920 {
921   const UnicharToUtf8 conv(uc);
922   return utf8_char_offset(string_, string_.rfind(conv.buf, utf8_byte_offset(string_, i), conv.len));
923 }
924 
925 ustring::size_type
rfind(char c,ustring::size_type i) const926 ustring::rfind(char c, ustring::size_type i) const
927 {
928   return utf8_char_offset(string_, string_.rfind(c, utf8_byte_offset(string_, i)));
929 }
930 
931 /**** Glib::ustring::find_first_of() ***************************************/
932 
933 ustring::size_type
find_first_of(const ustring & match,ustring::size_type i) const934 ustring::find_first_of(const ustring& match, ustring::size_type i) const
935 {
936   return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), false);
937 }
938 
939 ustring::size_type
find_first_of(const char * match,ustring::size_type i,ustring::size_type n) const940 ustring::find_first_of(const char* match, ustring::size_type i, ustring::size_type n) const
941 {
942   return utf8_find_first_of(string_, i, match, n, false);
943 }
944 
945 ustring::size_type
find_first_of(const char * match,ustring::size_type i) const946 ustring::find_first_of(const char* match, ustring::size_type i) const
947 {
948   return utf8_find_first_of(string_, i, match, -1, false);
949 }
950 
951 ustring::size_type
find_first_of(gunichar uc,ustring::size_type i) const952 ustring::find_first_of(gunichar uc, ustring::size_type i) const
953 {
954   return find(uc, i);
955 }
956 
957 ustring::size_type
find_first_of(char c,ustring::size_type i) const958 ustring::find_first_of(char c, ustring::size_type i) const
959 {
960   return find(c, i);
961 }
962 
963 /**** Glib::ustring::find_last_of() ****************************************/
964 
965 ustring::size_type
find_last_of(const ustring & match,ustring::size_type i) const966 ustring::find_last_of(const ustring& match, ustring::size_type i) const
967 {
968   return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), false);
969 }
970 
971 ustring::size_type
find_last_of(const char * match,ustring::size_type i,ustring::size_type n) const972 ustring::find_last_of(const char* match, ustring::size_type i, ustring::size_type n) const
973 {
974   return utf8_find_last_of(string_, i, match, n, false);
975 }
976 
977 ustring::size_type
find_last_of(const char * match,ustring::size_type i) const978 ustring::find_last_of(const char* match, ustring::size_type i) const
979 {
980   return utf8_find_last_of(string_, i, match, -1, false);
981 }
982 
983 ustring::size_type
find_last_of(gunichar uc,ustring::size_type i) const984 ustring::find_last_of(gunichar uc, ustring::size_type i) const
985 {
986   return rfind(uc, i);
987 }
988 
989 ustring::size_type
find_last_of(char c,ustring::size_type i) const990 ustring::find_last_of(char c, ustring::size_type i) const
991 {
992   return rfind(c, i);
993 }
994 
995 /**** Glib::ustring::find_first_not_of() ***********************************/
996 
997 ustring::size_type
find_first_not_of(const ustring & match,ustring::size_type i) const998 ustring::find_first_not_of(const ustring& match, ustring::size_type i) const
999 {
1000   return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), true);
1001 }
1002 
1003 ustring::size_type
find_first_not_of(const char * match,ustring::size_type i,ustring::size_type n) const1004 ustring::find_first_not_of(const char* match, ustring::size_type i, ustring::size_type n) const
1005 {
1006   return utf8_find_first_of(string_, i, match, n, true);
1007 }
1008 
1009 ustring::size_type
find_first_not_of(const char * match,ustring::size_type i) const1010 ustring::find_first_not_of(const char* match, ustring::size_type i) const
1011 {
1012   return utf8_find_first_of(string_, i, match, -1, true);
1013 }
1014 
1015 // Unfortunately, all of the find_*_not_of() methods for single
1016 // characters need their own special implementation.
1017 //
1018 ustring::size_type
find_first_not_of(gunichar uc,ustring::size_type i) const1019 ustring::find_first_not_of(gunichar uc, ustring::size_type i) const
1020 {
1021   const size_type bi = utf8_byte_offset(string_, i);
1022   if (bi != npos)
1023   {
1024     const char* const pbegin = string_.data();
1025     const char* const pend = pbegin + string_.size();
1026 
1027     for (const char *p = pbegin + bi; p < pend; p = g_utf8_next_char(p), ++i)
1028     {
1029       if (g_utf8_get_char(p) != uc)
1030         return i;
1031     }
1032   }
1033   return npos;
1034 }
1035 
1036 ustring::size_type
find_first_not_of(char c,ustring::size_type i) const1037 ustring::find_first_not_of(char c, ustring::size_type i) const
1038 {
1039   const size_type bi = utf8_byte_offset(string_, i);
1040   if (bi != npos)
1041   {
1042     const char* const pbegin = string_.data();
1043     const char* const pend = pbegin + string_.size();
1044 
1045     for (const char *p = pbegin + bi; p < pend; p = g_utf8_next_char(p), ++i)
1046     {
1047       if (*p != c)
1048         return i;
1049     }
1050   }
1051   return npos;
1052 }
1053 
1054 /**** Glib::ustring::find_last_not_of() ************************************/
1055 
1056 ustring::size_type
find_last_not_of(const ustring & match,ustring::size_type i) const1057 ustring::find_last_not_of(const ustring& match, ustring::size_type i) const
1058 {
1059   return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), true);
1060 }
1061 
1062 ustring::size_type
find_last_not_of(const char * match,ustring::size_type i,ustring::size_type n) const1063 ustring::find_last_not_of(const char* match, ustring::size_type i, ustring::size_type n) const
1064 {
1065   return utf8_find_last_of(string_, i, match, n, true);
1066 }
1067 
1068 ustring::size_type
find_last_not_of(const char * match,ustring::size_type i) const1069 ustring::find_last_not_of(const char* match, ustring::size_type i) const
1070 {
1071   return utf8_find_last_of(string_, i, match, -1, true);
1072 }
1073 
1074 // Unfortunately, all of the find_*_not_of() methods for single
1075 // characters need their own special implementation.
1076 //
1077 ustring::size_type
find_last_not_of(gunichar uc,ustring::size_type i) const1078 ustring::find_last_not_of(gunichar uc, ustring::size_type i) const
1079 {
1080   const char* const pbegin = string_.data();
1081   const char* const pend = pbegin + string_.size();
1082   size_type i_cur = 0;
1083   size_type i_found = npos;
1084 
1085   for (const char *p = pbegin; p < pend && i_cur <= i; p = g_utf8_next_char(p), ++i_cur)
1086   {
1087     if (g_utf8_get_char(p) != uc)
1088       i_found = i_cur;
1089   }
1090   return i_found;
1091 }
1092 
1093 ustring::size_type
find_last_not_of(char c,ustring::size_type i) const1094 ustring::find_last_not_of(char c, ustring::size_type i) const
1095 {
1096   const char* const pbegin = string_.data();
1097   const char* const pend = pbegin + string_.size();
1098   size_type i_cur = 0;
1099   size_type i_found = npos;
1100 
1101   for (const char *p = pbegin; p < pend && i_cur <= i; p = g_utf8_next_char(p), ++i_cur)
1102   {
1103     if (*p != c)
1104       i_found = i_cur;
1105   }
1106   return i_found;
1107 }
1108 
1109 /**** Glib::ustring -- get size and resize *********************************/
1110 
1111 bool
empty() const1112 ustring::empty() const
1113 {
1114   return string_.empty();
1115 }
1116 
1117 ustring::size_type
size() const1118 ustring::size() const
1119 {
1120   const char* const pdata = string_.data();
1121   return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
1122 }
1123 
1124 ustring::size_type
length() const1125 ustring::length() const
1126 {
1127   const char* const pdata = string_.data();
1128   return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
1129 }
1130 
1131 ustring::size_type
bytes() const1132 ustring::bytes() const
1133 {
1134   return string_.size();
1135 }
1136 
1137 ustring::size_type
capacity() const1138 ustring::capacity() const
1139 {
1140   return string_.capacity();
1141 }
1142 
1143 ustring::size_type
max_size() const1144 ustring::max_size() const
1145 {
1146   return string_.max_size();
1147 }
1148 
1149 void
resize(ustring::size_type n,gunichar uc)1150 ustring::resize(ustring::size_type n, gunichar uc)
1151 {
1152   const size_type size_now = size();
1153   if (n < size_now)
1154     erase(n, npos);
1155   else if (n > size_now)
1156     append(n - size_now, uc);
1157 }
1158 
1159 void
resize(ustring::size_type n,char c)1160 ustring::resize(ustring::size_type n, char c)
1161 {
1162   const size_type size_now = size();
1163   if (n < size_now)
1164     erase(n, npos);
1165   else if (n > size_now)
1166     string_.append(n - size_now, c);
1167 }
1168 
1169 void
reserve(ustring::size_type n)1170 ustring::reserve(ustring::size_type n)
1171 {
1172   string_.reserve(n);
1173 }
1174 
1175 /**** Glib::ustring -- C string access *************************************/
1176 
1177 const char*
data() const1178 ustring::data() const
1179 {
1180   return string_.data();
1181 }
1182 
1183 const char*
c_str() const1184 ustring::c_str() const
1185 {
1186   return string_.c_str();
1187 }
1188 
1189 // Note that copy() requests UTF-8 character offsets as
1190 // parameters, but returns the number of copied bytes.
1191 //
1192 ustring::size_type
copy(char * dest,ustring::size_type n,ustring::size_type i) const1193 ustring::copy(char* dest, ustring::size_type n, ustring::size_type i) const
1194 {
1195   const Utf8SubstrBounds bounds(string_, i, n);
1196   return string_.copy(dest, bounds.n, bounds.i);
1197 }
1198 
1199 /**** Glib::ustring -- UTF-8 utilities *************************************/
1200 
1201 bool
validate() const1202 ustring::validate() const
1203 {
1204   return (g_utf8_validate(string_.data(), string_.size(), nullptr) != 0);
1205 }
1206 
1207 bool
validate(ustring::iterator & first_invalid)1208 ustring::validate(ustring::iterator& first_invalid)
1209 {
1210   const char* const pdata = string_.data();
1211   const char* valid_end = pdata;
1212   const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
1213 
1214   first_invalid = iterator(string_.begin() + (valid_end - pdata));
1215   return (is_valid != 0);
1216 }
1217 
1218 bool
validate(ustring::const_iterator & first_invalid) const1219 ustring::validate(ustring::const_iterator& first_invalid) const
1220 {
1221   const char* const pdata = string_.data();
1222   const char* valid_end = pdata;
1223   const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
1224 
1225   first_invalid = const_iterator(string_.begin() + (valid_end - pdata));
1226   return (is_valid != 0);
1227 }
1228 
1229 ustring
make_valid() const1230 ustring::make_valid() const
1231 {
1232   return convert_return_gchar_ptr_to_ustring(g_utf8_make_valid(string_.data(), string_.size()));
1233 }
1234 
1235 bool
is_ascii() const1236 ustring::is_ascii() const
1237 {
1238   const char* p = string_.data();
1239   const char* const pend = p + string_.size();
1240 
1241   for (; p != pend; ++p)
1242   {
1243     if ((static_cast<unsigned char>(*p) & 0x80u) != 0)
1244       return false;
1245   }
1246 
1247   return true;
1248 }
1249 
1250 ustring
normalize(NormalizeMode mode) const1251 ustring::normalize(NormalizeMode mode) const
1252 {
1253   return convert_return_gchar_ptr_to_ustring(
1254     g_utf8_normalize(string_.data(), string_.size(), static_cast<GNormalizeMode>(int(mode))));
1255 }
1256 
1257 ustring
uppercase() const1258 ustring::uppercase() const
1259 {
1260   return convert_return_gchar_ptr_to_ustring(g_utf8_strup(string_.data(), string_.size()));
1261 }
1262 
1263 ustring
lowercase() const1264 ustring::lowercase() const
1265 {
1266   return convert_return_gchar_ptr_to_ustring(g_utf8_strdown(string_.data(), string_.size()));
1267 }
1268 
1269 ustring
casefold() const1270 ustring::casefold() const
1271 {
1272   return convert_return_gchar_ptr_to_ustring(g_utf8_casefold(string_.data(), string_.size()));
1273 }
1274 
1275 std::string
collate_key() const1276 ustring::collate_key() const
1277 {
1278   return convert_return_gchar_ptr_to_stdstring(g_utf8_collate_key(string_.data(), string_.size()));
1279 }
1280 
1281 std::string
casefold_collate_key() const1282 ustring::casefold_collate_key() const
1283 {
1284   char* const casefold_buf = g_utf8_casefold(string_.data(), string_.size());
1285   char* const key_buf = g_utf8_collate_key(casefold_buf, -1);
1286   g_free(casefold_buf);
1287   return std::string(make_unique_ptr_gfree(key_buf).get());
1288 }
1289 
1290 /**** Glib::ustring -- Message formatting **********************************/
1291 
1292 // static
1293 ustring
compose_argv(const Glib::ustring & fmt,int argc,const ustring * const * argv)1294 ustring::compose_argv(const Glib::ustring& fmt, int argc, const ustring* const* argv)
1295 {
1296   std::string::size_type result_size = fmt.raw().size();
1297 
1298   // Guesstimate the final string size.
1299   for (int i = 0; i < argc; ++i)
1300     result_size += argv[i]->raw().size();
1301 
1302   std::string result;
1303   result.reserve(result_size);
1304 
1305   const char* const pfmt = fmt.raw().c_str();
1306   const char* start = pfmt;
1307 
1308   while (const char* const stop = std::strchr(start, '%'))
1309   {
1310     if (stop[1] == '%')
1311     {
1312       result.append(start, stop - start + 1);
1313       start = stop + 2;
1314     }
1315     else
1316     {
1317       const int index = Ascii::digit_value(stop[1]) - 1;
1318 
1319       if (index >= 0 && index < argc)
1320       {
1321         result.append(start, stop - start);
1322         result += argv[index]->raw();
1323         start = stop + 2;
1324       }
1325       else
1326       {
1327         const char* const next = (stop[1] != '\0') ? g_utf8_next_char(stop + 1) : (stop + 1);
1328 
1329         // Copy invalid substitutions literally to the output.
1330         result.append(start, next - start);
1331 
1332         g_warning("invalid substitution \"%s\" in fmt string \"%s\"",
1333           result.c_str() + result.size() - (next - stop), pfmt);
1334         start = next;
1335       }
1336     }
1337   }
1338 
1339   result.append(start, pfmt + fmt.raw().size() - start);
1340 
1341   return result;
1342 }
1343 
1344 /**** Glib::ustring::SequenceToString **************************************/
1345 
SequenceToString(Glib::ustring::iterator pbegin,Glib::ustring::iterator pend)1346 ustring::SequenceToString<Glib::ustring::iterator, gunichar>::SequenceToString(
1347   Glib::ustring::iterator pbegin, Glib::ustring::iterator pend)
1348 : std::string(pbegin.base(), pend.base())
1349 {
1350 }
1351 
SequenceToString(Glib::ustring::const_iterator pbegin,Glib::ustring::const_iterator pend)1352 ustring::SequenceToString<Glib::ustring::const_iterator, gunichar>::SequenceToString(
1353   Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend)
1354 : std::string(pbegin.base(), pend.base())
1355 {
1356 }
1357 
1358 /**** Glib::ustring::FormatStream ******************************************/
1359 
FormatStream()1360 ustring::FormatStream::FormatStream() : stream_()
1361 {
1362 }
1363 
~FormatStream()1364 ustring::FormatStream::~FormatStream() noexcept
1365 {
1366 }
1367 
1368 ustring
to_string() const1369 ustring::FormatStream::to_string() const
1370 {
1371   GError* error = nullptr;
1372 
1373 #ifdef GLIBMM_HAVE_WIDE_STREAM
1374   const std::wstring str = stream_.str();
1375 
1376 #if (defined(__STDC_ISO_10646__) || defined(_LIBCPP_VERSION)) && SIZEOF_WCHAR_T == 4
1377   // Avoid going through iconv if wchar_t always contains UCS-4.
1378   glong n_bytes = 0;
1379   const auto buf = make_unique_ptr_gfree(g_ucs4_to_utf8(
1380     reinterpret_cast<const gunichar*>(str.data()), str.size(), nullptr, &n_bytes, &error));
1381 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1382   // Avoid going through iconv if wchar_t always contains UTF-16.
1383   glong n_bytes = 0;
1384   const auto buf = make_unique_ptr_gfree(g_utf16_to_utf8(
1385     reinterpret_cast<const gunichar2*>(str.data()), str.size(), nullptr, &n_bytes, &error));
1386 #else
1387   gsize n_bytes = 0;
1388   const auto buf = make_unique_ptr_gfree(g_convert(reinterpret_cast<const char*>(str.data()),
1389     str.size() * sizeof(std::wstring::value_type), "UTF-8", "WCHAR_T", nullptr, &n_bytes, &error));
1390 #endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */
1391 
1392 #else /* !GLIBMM_HAVE_WIDE_STREAM */
1393   const std::string str = stream_.str();
1394 
1395   gsize n_bytes = 0;
1396   const auto buf =
1397     make_unique_ptr_gfree(g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error));
1398 #endif /* !GLIBMM_HAVE_WIDE_STREAM */
1399 
1400   if (error)
1401   {
1402     Glib::Error::throw_exception(error);
1403   }
1404 
1405   return ustring(buf.get(), buf.get() + n_bytes);
1406 }
1407 
1408 /**** Glib::ustring -- stream I/O operators ********************************/
1409 
1410 std::istream&
operator >>(std::istream & is,Glib::ustring & utf8_string)1411 operator>>(std::istream& is, Glib::ustring& utf8_string)
1412 {
1413   std::string str;
1414   is >> str;
1415 
1416   GError* error = nullptr;
1417   gsize n_bytes = 0;
1418   const auto buf =
1419     make_unique_ptr_gfree(g_locale_to_utf8(str.data(), str.size(), nullptr, &n_bytes, &error));
1420 
1421   if (error)
1422   {
1423     Glib::Error::throw_exception(error);
1424   }
1425 
1426   utf8_string.assign(buf.get(), buf.get() + n_bytes);
1427 
1428   return is;
1429 }
1430 
1431 std::ostream&
operator <<(std::ostream & os,const Glib::ustring & utf8_string)1432 operator<<(std::ostream& os, const Glib::ustring& utf8_string)
1433 {
1434   GError* error = nullptr;
1435   const auto buf = make_unique_ptr_gfree(g_locale_from_utf8(
1436     utf8_string.raw().data(), utf8_string.raw().size(), nullptr, nullptr, &error));
1437   if (error)
1438   {
1439     Glib::Error::throw_exception(error);
1440   }
1441 
1442   // This won't work if the string contains NUL characters.  Unfortunately,
1443   // std::ostream::write() ignores format flags, so we cannot use that.
1444   // The only option would be to create a temporary std::string.  However,
1445   // even then GCC's libstdc++-v3 prints only the characters up to the first
1446   // NUL.  Given this, there doesn't seem much of a point in allowing NUL in
1447   // formatted output.  The semantics would be unclear anyway: what's the
1448   // screen width of a NUL?
1449   os << buf.get();
1450 
1451   return os;
1452 }
1453 
1454 #ifdef GLIBMM_HAVE_WIDE_STREAM
1455 
1456 std::wistream&
operator >>(std::wistream & is,ustring & utf8_string)1457 operator>>(std::wistream& is, ustring& utf8_string)
1458 {
1459   GError* error = nullptr;
1460 
1461   std::wstring wstr;
1462   is >> wstr;
1463 
1464 #if (defined(__STDC_ISO_10646__) || defined(_LIBCPP_VERSION)) && SIZEOF_WCHAR_T == 4
1465   // Avoid going through iconv if wchar_t always contains UCS-4.
1466   glong n_bytes = 0;
1467   const auto buf = make_unique_ptr_gfree(g_ucs4_to_utf8(
1468     reinterpret_cast<const gunichar*>(wstr.data()), wstr.size(), nullptr, &n_bytes, &error));
1469 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1470   // Avoid going through iconv if wchar_t always contains UTF-16.
1471   glong n_bytes = 0;
1472   const auto buf = make_unique_ptr_gfree(g_utf16_to_utf8(
1473     reinterpret_cast<const gunichar2*>(wstr.data()), wstr.size(), nullptr, &n_bytes, &error));
1474 #else
1475   gsize n_bytes = 0;
1476   const auto buf = make_unique_ptr_gfree(g_convert(reinterpret_cast<const char*>(wstr.data()),
1477     wstr.size() * sizeof(std::wstring::value_type), "UTF-8", "WCHAR_T", nullptr, &n_bytes, &error));
1478 #endif // !(__STDC_ISO_10646__ || G_OS_WIN32)
1479 
1480   if (error)
1481   {
1482     Glib::Error::throw_exception(error);
1483   }
1484 
1485   utf8_string.assign(buf.get(), buf.get() + n_bytes);
1486 
1487   return is;
1488 }
1489 
1490 std::wostream&
operator <<(std::wostream & os,const ustring & utf8_string)1491 operator<<(std::wostream& os, const ustring& utf8_string)
1492 {
1493   GError* error = nullptr;
1494 
1495 #if (defined(__STDC_ISO_10646__) || defined(_LIBCPP_VERSION)) && SIZEOF_WCHAR_T == 4
1496   // Avoid going through iconv if wchar_t always contains UCS-4.
1497   const auto buf = make_unique_ptr_gfree(
1498     g_utf8_to_ucs4(utf8_string.raw().data(), utf8_string.raw().size(), nullptr, nullptr, &error));
1499 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1500   // Avoid going through iconv if wchar_t always contains UTF-16.
1501   const auto buf = make_unique_ptr_gfree(
1502     g_utf8_to_utf16(utf8_string.raw().data(), utf8_string.raw().size(), nullptr, nullptr, &error));
1503 #else
1504   const auto buf = make_unique_ptr_gfree(g_convert(utf8_string.raw().data(),
1505     utf8_string.raw().size(), "WCHAR_T", "UTF-8", nullptr, nullptr, &error));
1506 #endif // !(__STDC_ISO_10646__ || G_OS_WIN32)
1507 
1508   if (error)
1509   {
1510     Glib::Error::throw_exception(error);
1511   }
1512 
1513   // This won't work if the string contains NUL characters.  Unfortunately,
1514   // std::wostream::write() ignores format flags, so we cannot use that.
1515   // The only option would be to create a temporary std::wstring.  However,
1516   // even then GCC's libstdc++-v3 prints only the characters up to the first
1517   // NUL.  Given this, there doesn't seem much of a point in allowing NUL in
1518   // formatted output.  The semantics would be unclear anyway: what's the
1519   // screen width of a NUL?
1520   os << reinterpret_cast<wchar_t*>(buf.get());
1521 
1522   return os;
1523 }
1524 
1525 #endif /* GLIBMM_HAVE_WIDE_STREAM */
1526 
1527 } // namespace Glib
1528