1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2015-2020 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #include <codecvt>
26 #include <cstring>		// std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h>	// std::min
28 
29 namespace std _GLIBCXX_VISIBILITY(default)
30 {
31 _GLIBCXX_BEGIN_NAMESPACE_VERSION
32 
33   // The standard doesn't define these operators, which is annoying.
34   static underlying_type<codecvt_mode>::type
to_integer(codecvt_mode m)35   to_integer(codecvt_mode m)
36   { return static_cast<underlying_type<codecvt_mode>::type>(m); }
37 
operator &=(codecvt_mode & m,codecvt_mode n)38   static codecvt_mode& operator&=(codecvt_mode& m, codecvt_mode n)
39   { return m = codecvt_mode(to_integer(m) & to_integer(n)); }
40 
operator |=(codecvt_mode & m,codecvt_mode n)41   static codecvt_mode& operator|=(codecvt_mode& m, codecvt_mode n)
42   { return m = codecvt_mode(to_integer(m) | to_integer(n)); }
43 
operator ~(codecvt_mode m)44   static codecvt_mode operator~(codecvt_mode m)
45   { return codecvt_mode(~to_integer(m)); }
46 
47 namespace
48 {
49   // Largest code point that fits in a single UTF-16 code unit.
50   const char32_t max_single_utf16_unit = 0xFFFF;
51 
52   const char32_t max_code_point = 0x10FFFF;
53 
54   // The functions below rely on maxcode < incomplete_mb_character
55   // (which is enforced by the codecvt_utf* classes on construction).
56   const char32_t incomplete_mb_character = char32_t(-2);
57   const char32_t invalid_mb_sequence = char32_t(-1);
58 
59   // Utility type for reading and writing code units of type Elem from
60   // a range defined by a pair of pointers.
61   template<typename Elem, bool Aligned = true>
62     struct range
63     {
64       Elem* next;
65       Elem* end;
66 
67       // Write a code unit.
operator =std::__anon73578e200111::range68       range& operator=(Elem e)
69       {
70 	*next++ = e;
71 	return *this;
72       }
73 
74       // Read the next code unit.
operator *std::__anon73578e200111::range75       Elem operator*() const { return *next; }
76 
77       // Read the Nth code unit.
operator []std::__anon73578e200111::range78       Elem operator[](size_t n) const { return next[n]; }
79 
80       // Move to the next code unit.
operator ++std::__anon73578e200111::range81       range& operator++()
82       {
83 	++next;
84 	return *this;
85       }
86 
87       // Move to the Nth code unit.
operator +=std::__anon73578e200111::range88       range& operator+=(size_t n)
89       {
90 	next += n;
91 	return *this;
92       }
93 
94       // The number of code units remaining.
sizestd::__anon73578e200111::range95       size_t size() const { return end - next; }
96 
97       // The number of bytes remaining.
nbytesstd::__anon73578e200111::range98       size_t nbytes() const { return (const char*)end - (const char*)next; }
99     };
100 
101   // This specialization is used when accessing char16_t values through
102   // pointers to char, which might not be correctly aligned for char16_t.
103   template<typename Elem>
104     struct range<Elem, false>
105     {
106       using value_type = typename remove_const<Elem>::type;
107 
108       using char_pointer = typename
109 	conditional<is_const<Elem>::value, const char*, char*>::type;
110 
111       char_pointer next;
112       char_pointer end;
113 
114       // Write a code unit.
operator =std::__anon73578e200111::range115       range& operator=(Elem e)
116       {
117 	memcpy(next, &e, sizeof(Elem));
118 	++*this;
119 	return *this;
120       }
121 
122       // Read the next code unit.
operator *std::__anon73578e200111::range123       Elem operator*() const
124       {
125 	value_type e;
126 	memcpy(&e, next, sizeof(Elem));
127 	return e;
128       }
129 
130       // Read the Nth code unit.
operator []std::__anon73578e200111::range131       Elem operator[](size_t n) const
132       {
133 	value_type e;
134 	memcpy(&e, next + n * sizeof(Elem), sizeof(Elem));
135 	return e;
136       }
137 
138       // Move to the next code unit.
operator ++std::__anon73578e200111::range139       range& operator++()
140       {
141 	next += sizeof(Elem);
142 	return *this;
143       }
144 
145       // Move to the Nth code unit.
operator +=std::__anon73578e200111::range146       range& operator+=(size_t n)
147       {
148 	next += n * sizeof(Elem);
149 	return *this;
150       }
151 
152       // The number of code units remaining.
sizestd::__anon73578e200111::range153       size_t size() const { return nbytes() / sizeof(Elem); }
154 
155       // The number of bytes remaining.
nbytesstd::__anon73578e200111::range156       size_t nbytes() const { return end - next; }
157     };
158 
159   // Multibyte sequences can have "header" consisting of Byte Order Mark
160   const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
161   const unsigned char utf16_bom[2] = { 0xFE, 0xFF };
162   const unsigned char utf16le_bom[2] = { 0xFF, 0xFE };
163 
164   // Write a BOM (space permitting).
165   template<typename C, bool A, size_t N>
166     bool
write_bom(range<C,A> & to,const unsigned char (& bom)[N])167     write_bom(range<C, A>& to, const unsigned char (&bom)[N])
168     {
169       static_assert( (N / sizeof(C)) != 0, "" );
170       static_assert( (N % sizeof(C)) == 0, "" );
171 
172       if (to.nbytes() < N)
173 	return false;
174       memcpy(to.next, bom, N);
175       to += (N / sizeof(C));
176       return true;
177     }
178 
179   // Try to read a BOM.
180   template<typename C, bool A, size_t N>
181     bool
read_bom(range<C,A> & from,const unsigned char (& bom)[N])182     read_bom(range<C, A>& from, const unsigned char (&bom)[N])
183     {
184       static_assert( (N / sizeof(C)) != 0, "" );
185       static_assert( (N % sizeof(C)) == 0, "" );
186 
187       if (from.nbytes() >= N && !memcmp(from.next, bom, N))
188 	{
189 	  from += (N / sizeof(C));
190 	  return true;
191 	}
192       return false;
193     }
194 
195   // If generate_header is set in mode write out UTF-8 BOM.
196   template<typename C>
197   bool
write_utf8_bom(range<C> & to,codecvt_mode mode)198   write_utf8_bom(range<C>& to, codecvt_mode mode)
199   {
200     if (mode & generate_header)
201       return write_bom(to, utf8_bom);
202     return true;
203   }
204 
205   // If generate_header is set in mode write out the UTF-16 BOM indicated
206   // by whether little_endian is set in mode.
207   template<bool Aligned>
208   bool
write_utf16_bom(range<char16_t,Aligned> & to,codecvt_mode mode)209   write_utf16_bom(range<char16_t, Aligned>& to, codecvt_mode mode)
210   {
211     if (mode & generate_header)
212     {
213       if (mode & little_endian)
214 	return write_bom(to, utf16le_bom);
215       else
216 	return write_bom(to, utf16_bom);
217     }
218     return true;
219   }
220 
221   // If consume_header is set in mode update from.next to after any BOM.
222   template<typename C>
223   void
read_utf8_bom(range<const C> & from,codecvt_mode mode)224   read_utf8_bom(range<const C>& from, codecvt_mode mode)
225   {
226     if (mode & consume_header)
227       read_bom(from, utf8_bom);
228   }
229 
230   // If consume_header is not set in mode, no effects.
231   // Otherwise, if *from.next is a UTF-16 BOM increment from.next and then:
232   // - if the UTF-16BE BOM was found unset little_endian in mode, or
233   // - if the UTF-16LE BOM was found set little_endian in mode.
234   template<bool Aligned>
235   void
read_utf16_bom(range<const char16_t,Aligned> & from,codecvt_mode & mode)236   read_utf16_bom(range<const char16_t, Aligned>& from, codecvt_mode& mode)
237   {
238     if (mode & consume_header)
239       {
240 	if (read_bom(from, utf16_bom))
241 	  mode &= ~little_endian;
242 	else if (read_bom(from, utf16le_bom))
243 	  mode |= little_endian;
244       }
245   }
246 
247   // Read a codepoint from a UTF-8 multibyte sequence.
248   // Updates from.next if the codepoint is not greater than maxcode.
249   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
250   template<typename C>
251   char32_t
read_utf8_code_point(range<const C> & from,unsigned long maxcode)252   read_utf8_code_point(range<const C>& from, unsigned long maxcode)
253   {
254     const size_t avail = from.size();
255     if (avail == 0)
256       return incomplete_mb_character;
257     unsigned char c1 = from[0];
258     // https://en.wikipedia.org/wiki/UTF-8#Sample_code
259     if (c1 < 0x80)
260     {
261       ++from;
262       return c1;
263     }
264     else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
265       return invalid_mb_sequence;
266     else if (c1 < 0xE0) // 2-byte sequence
267     {
268       if (avail < 2)
269 	return incomplete_mb_character;
270       unsigned char c2 = from[1];
271       if ((c2 & 0xC0) != 0x80)
272 	return invalid_mb_sequence;
273       char32_t c = (c1 << 6) + c2 - 0x3080;
274       if (c <= maxcode)
275 	from += 2;
276       return c;
277     }
278     else if (c1 < 0xF0) // 3-byte sequence
279     {
280       if (avail < 3)
281 	return incomplete_mb_character;
282       unsigned char c2 = from[1];
283       if ((c2 & 0xC0) != 0x80)
284 	return invalid_mb_sequence;
285       if (c1 == 0xE0 && c2 < 0xA0) // overlong
286 	return invalid_mb_sequence;
287       unsigned char c3 = from[2];
288       if ((c3 & 0xC0) != 0x80)
289 	return invalid_mb_sequence;
290       char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
291       if (c <= maxcode)
292 	from += 3;
293       return c;
294     }
295     else if (c1 < 0xF5) // 4-byte sequence
296     {
297       if (avail < 4)
298 	return incomplete_mb_character;
299       unsigned char c2 = from[1];
300       if ((c2 & 0xC0) != 0x80)
301 	return invalid_mb_sequence;
302       if (c1 == 0xF0 && c2 < 0x90) // overlong
303 	return invalid_mb_sequence;
304       if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
305       return invalid_mb_sequence;
306       unsigned char c3 = from[2];
307       if ((c3 & 0xC0) != 0x80)
308 	return invalid_mb_sequence;
309       unsigned char c4 = from[3];
310       if ((c4 & 0xC0) != 0x80)
311 	return invalid_mb_sequence;
312       char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
313       if (c <= maxcode)
314 	from += 4;
315       return c;
316     }
317     else // > U+10FFFF
318       return invalid_mb_sequence;
319   }
320 
321   template<typename C>
322   bool
write_utf8_code_point(range<C> & to,char32_t code_point)323   write_utf8_code_point(range<C>& to, char32_t code_point)
324   {
325     if (code_point < 0x80)
326       {
327 	if (to.size() < 1)
328 	  return false;
329 	to = code_point;
330       }
331     else if (code_point <= 0x7FF)
332       {
333 	if (to.size() < 2)
334 	  return false;
335 	to = (code_point >> 6) + 0xC0;
336 	to = (code_point & 0x3F) + 0x80;
337       }
338     else if (code_point <= 0xFFFF)
339       {
340 	if (to.size() < 3)
341 	  return false;
342 	to = (code_point >> 12) + 0xE0;
343 	to = ((code_point >> 6) & 0x3F) + 0x80;
344 	to = (code_point & 0x3F) + 0x80;
345       }
346     else if (code_point <= 0x10FFFF)
347       {
348 	if (to.size() < 4)
349 	  return false;
350 	to = (code_point >> 18) + 0xF0;
351 	to = ((code_point >> 12) & 0x3F) + 0x80;
352 	to = ((code_point >> 6) & 0x3F) + 0x80;
353 	to = (code_point & 0x3F) + 0x80;
354       }
355     else
356       return false;
357     return true;
358   }
359 
360   inline char16_t
adjust_byte_order(char16_t c,codecvt_mode mode)361   adjust_byte_order(char16_t c, codecvt_mode mode)
362   {
363 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
364     return (mode & little_endian) ? __builtin_bswap16(c) : c;
365 #else
366     return (mode & little_endian) ? c : __builtin_bswap16(c);
367 #endif
368   }
369 
370   // Return true if c is a high-surrogate (aka leading) code point.
371   inline bool
is_high_surrogate(char32_t c)372   is_high_surrogate(char32_t c)
373   {
374     return c >= 0xD800 && c <= 0xDBFF;
375   }
376 
377   // Return true if c is a low-surrogate (aka trailing) code point.
378   inline bool
is_low_surrogate(char32_t c)379   is_low_surrogate(char32_t c)
380   {
381     return c >= 0xDC00 && c <= 0xDFFF;
382   }
383 
384   inline char32_t
surrogate_pair_to_code_point(char32_t high,char32_t low)385   surrogate_pair_to_code_point(char32_t high, char32_t low)
386   {
387     return (high << 10) + low - 0x35FDC00;
388   }
389 
390   // Read a codepoint from a UTF-16 multibyte sequence.
391   // The sequence's endianness is indicated by (mode & little_endian).
392   // Updates from.next if the codepoint is not greater than maxcode.
393   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
394   template<bool Aligned>
395     char32_t
read_utf16_code_point(range<const char16_t,Aligned> & from,unsigned long maxcode,codecvt_mode mode)396     read_utf16_code_point(range<const char16_t, Aligned>& from,
397 			  unsigned long maxcode, codecvt_mode mode)
398     {
399       const size_t avail = from.size();
400       if (avail == 0)
401 	return incomplete_mb_character;
402       int inc = 1;
403       char32_t c = adjust_byte_order(from[0], mode);
404       if (is_high_surrogate(c))
405 	{
406 	  if (avail < 2)
407 	    return incomplete_mb_character;
408 	  const char16_t c2 = adjust_byte_order(from[1], mode);
409 	  if (is_low_surrogate(c2))
410 	    {
411 	      c = surrogate_pair_to_code_point(c, c2);
412 	      inc = 2;
413 	    }
414 	  else
415 	    return invalid_mb_sequence;
416 	}
417       else if (is_low_surrogate(c))
418 	return invalid_mb_sequence;
419       if (c <= maxcode)
420 	from += inc;
421       return c;
422     }
423 
424   template<typename C, bool A>
425   bool
write_utf16_code_point(range<C,A> & to,char32_t codepoint,codecvt_mode mode)426   write_utf16_code_point(range<C, A>& to, char32_t codepoint, codecvt_mode mode)
427   {
428     static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
429 
430     if (codepoint <= max_single_utf16_unit)
431       {
432 	if (to.size() > 0)
433 	  {
434 	    to = adjust_byte_order(codepoint, mode);
435 	    return true;
436 	  }
437       }
438     else if (to.size() > 1)
439       {
440 	// Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
441 	const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
442 	char16_t lead = LEAD_OFFSET + (codepoint >> 10);
443 	char16_t trail = 0xDC00 + (codepoint & 0x3FF);
444 	to = adjust_byte_order(lead, mode);
445 	to = adjust_byte_order(trail, mode);
446 	return true;
447       }
448     return false;
449   }
450 
451   // utf8 -> ucs4
452   template<typename C>
453   codecvt_base::result
ucs4_in(range<const C> & from,range<char32_t> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})454   ucs4_in(range<const C>& from, range<char32_t>& to,
455           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
456   {
457     read_utf8_bom(from, mode);
458     while (from.size() && to.size())
459       {
460 	const char32_t codepoint = read_utf8_code_point(from, maxcode);
461 	if (codepoint == incomplete_mb_character)
462 	  return codecvt_base::partial;
463 	if (codepoint > maxcode)
464 	  return codecvt_base::error;
465 	to = codepoint;
466       }
467     return from.size() ? codecvt_base::partial : codecvt_base::ok;
468   }
469 
470   // ucs4 -> utf8
471   template<typename C>
472   codecvt_base::result
ucs4_out(range<const char32_t> & from,range<C> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})473   ucs4_out(range<const char32_t>& from, range<C>& to,
474            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
475   {
476     if (!write_utf8_bom(to, mode))
477       return codecvt_base::partial;
478     while (from.size())
479       {
480 	const char32_t c = from[0];
481 	if (c > maxcode)
482 	  return codecvt_base::error;
483 	if (!write_utf8_code_point(to, c))
484 	  return codecvt_base::partial;
485 	++from;
486       }
487     return codecvt_base::ok;
488   }
489 
490   // utf16 -> ucs4
491   codecvt_base::result
ucs4_in(range<const char16_t,false> & from,range<char32_t> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})492   ucs4_in(range<const char16_t, false>& from, range<char32_t>& to,
493           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
494   {
495     read_utf16_bom(from, mode);
496     while (from.size() && to.size())
497       {
498 	const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
499 	if (codepoint == incomplete_mb_character)
500 	  return codecvt_base::partial;
501 	if (codepoint > maxcode)
502 	  return codecvt_base::error;
503 	to = codepoint;
504       }
505     return from.size() ? codecvt_base::partial : codecvt_base::ok;
506   }
507 
508   // ucs4 -> utf16
509   codecvt_base::result
ucs4_out(range<const char32_t> & from,range<char16_t,false> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})510   ucs4_out(range<const char32_t>& from, range<char16_t, false>& to,
511            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
512   {
513     if (!write_utf16_bom(to, mode))
514       return codecvt_base::partial;
515     while (from.size())
516       {
517 	const char32_t c = from[0];
518 	if (c > maxcode)
519 	  return codecvt_base::error;
520 	if (!write_utf16_code_point(to, c, mode))
521 	  return codecvt_base::partial;
522 	++from;
523       }
524     return codecvt_base::ok;
525   }
526 
527   // Flag indicating whether to process UTF-16 or UCS2
528   enum class surrogates { allowed, disallowed };
529 
530   // utf8 -> utf16 (or utf8 -> ucs2 if s == surrogates::disallowed)
531   template<typename C8, typename C16>
532   codecvt_base::result
utf16_in(range<const C8> & from,range<C16> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={},surrogates s=surrogates::allowed)533   utf16_in(range<const C8>& from, range<C16>& to,
534 	   unsigned long maxcode = max_code_point, codecvt_mode mode = {},
535 	   surrogates s = surrogates::allowed)
536   {
537     read_utf8_bom(from, mode);
538     while (from.size() && to.size())
539       {
540 	auto orig = from;
541 	const char32_t codepoint = read_utf8_code_point(from, maxcode);
542 	if (codepoint == incomplete_mb_character)
543 	  {
544 	    if (s == surrogates::allowed)
545 	      return codecvt_base::partial;
546 	    else
547 	      return codecvt_base::error; // No surrogates in UCS2
548 	  }
549 	if (codepoint > maxcode)
550 	  return codecvt_base::error;
551 	if (!write_utf16_code_point(to, codepoint, mode))
552 	  {
553 	    from = orig; // rewind to previous position
554 	    return codecvt_base::partial;
555 	  }
556       }
557     return codecvt_base::ok;
558   }
559 
560   // utf16 -> utf8 (or ucs2 -> utf8 if s == surrogates::disallowed)
561   template<typename C16, typename C8>
562   codecvt_base::result
utf16_out(range<const C16> & from,range<C8> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={},surrogates s=surrogates::allowed)563   utf16_out(range<const C16>& from, range<C8>& to,
564 	    unsigned long maxcode = max_code_point, codecvt_mode mode = {},
565 	    surrogates s = surrogates::allowed)
566   {
567     if (!write_utf8_bom(to, mode))
568       return codecvt_base::partial;
569     while (from.size())
570       {
571 	char32_t c = from[0];
572 	int inc = 1;
573 	if (is_high_surrogate(c))
574 	  {
575 	    if (s == surrogates::disallowed)
576 	      return codecvt_base::error; // No surrogates in UCS-2
577 
578 	    if (from.size() < 2)
579 	      return codecvt_base::ok; // stop converting at this point
580 
581 	    const char32_t c2 = from[1];
582 	    if (is_low_surrogate(c2))
583 	      {
584 		c = surrogate_pair_to_code_point(c, c2);
585 		inc = 2;
586 	      }
587 	    else
588 	      return codecvt_base::error;
589 	  }
590 	else if (is_low_surrogate(c))
591 	  return codecvt_base::error;
592 	if (c > maxcode)
593 	  return codecvt_base::error;
594 	if (!write_utf8_code_point(to, c))
595 	  return codecvt_base::partial;
596 	from += inc;
597       }
598     return codecvt_base::ok;
599   }
600 
601   // return pos such that [begin,pos) is valid UTF-16 string no longer than max
602   template<typename C>
603   const C*
utf16_span(const C * begin,const C * end,size_t max,char32_t maxcode=max_code_point,codecvt_mode mode={})604   utf16_span(const C* begin, const C* end, size_t max,
605 	     char32_t maxcode = max_code_point, codecvt_mode mode = {})
606   {
607     range<const C> from{ begin, end };
608     read_utf8_bom(from, mode);
609     size_t count = 0;
610     while (count+1 < max)
611       {
612 	char32_t c = read_utf8_code_point(from, maxcode);
613 	if (c > maxcode)
614 	  return from.next;
615 	else if (c > max_single_utf16_unit)
616 	  ++count;
617 	++count;
618       }
619     if (count+1 == max) // take one more character if it fits in a single unit
620       read_utf8_code_point(from, std::min(max_single_utf16_unit, maxcode));
621     return from.next;
622   }
623 
624   // utf8 -> ucs2
625   template<typename C>
626   codecvt_base::result
ucs2_in(range<const C> & from,range<char16_t> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})627   ucs2_in(range<const C>& from, range<char16_t>& to,
628 	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
629   {
630     // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
631     maxcode = std::min(max_single_utf16_unit, maxcode);
632     return utf16_in(from, to, maxcode, mode, surrogates::disallowed);
633   }
634 
635   // ucs2 -> utf8
636   template<typename C>
637   codecvt_base::result
ucs2_out(range<const char16_t> & from,range<C> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})638   ucs2_out(range<const char16_t>& from, range<C>& to,
639 	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
640   {
641     // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
642     maxcode = std::min(max_single_utf16_unit, maxcode);
643     return utf16_out(from, to, maxcode, mode, surrogates::disallowed);
644   }
645 
646   // ucs2 -> utf16
647   codecvt_base::result
ucs2_out(range<const char16_t> & from,range<char16_t,false> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})648   ucs2_out(range<const char16_t>& from, range<char16_t, false>& to,
649 	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
650   {
651     if (!write_utf16_bom(to, mode))
652       return codecvt_base::partial;
653     while (from.size() && to.size())
654       {
655 	char16_t c = from[0];
656 	if (is_high_surrogate(c))
657 	  return codecvt_base::error;
658 	if (c > maxcode)
659 	  return codecvt_base::error;
660 	to = adjust_byte_order(c, mode);
661 	++from;
662       }
663     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
664   }
665 
666   // utf16 -> ucs2
667   codecvt_base::result
ucs2_in(range<const char16_t,false> & from,range<char16_t> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})668   ucs2_in(range<const char16_t, false>& from, range<char16_t>& to,
669 	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
670   {
671     read_utf16_bom(from, mode);
672     // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
673     maxcode = std::min(max_single_utf16_unit, maxcode);
674     while (from.size() && to.size())
675       {
676 	const char32_t c = read_utf16_code_point(from, maxcode, mode);
677 	if (c == incomplete_mb_character)
678 	  return codecvt_base::error; // UCS-2 only supports single units.
679 	if (c > maxcode)
680 	  return codecvt_base::error;
681 	to = c;
682       }
683     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
684   }
685 
686   const char16_t*
ucs2_span(range<const char16_t,false> & from,size_t max,char32_t maxcode,codecvt_mode mode)687   ucs2_span(range<const char16_t, false>& from, size_t max,
688             char32_t maxcode, codecvt_mode mode)
689   {
690     read_utf16_bom(from, mode);
691     // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
692     maxcode = std::min(max_single_utf16_unit, maxcode);
693     char32_t c = 0;
694     while (max-- && c <= maxcode)
695       c = read_utf16_code_point(from, maxcode, mode);
696     return reinterpret_cast<const char16_t*>(from.next);
697   }
698 
699   template<typename C>
700   const C*
ucs2_span(const C * begin,const C * end,size_t max,char32_t maxcode,codecvt_mode mode)701   ucs2_span(const C* begin, const C* end, size_t max,
702             char32_t maxcode, codecvt_mode mode)
703   {
704     range<const C> from{ begin, end };
705     read_utf8_bom(from, mode);
706     // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
707     maxcode = std::min(max_single_utf16_unit, maxcode);
708     char32_t c = 0;
709     while (max-- && c <= maxcode)
710       c = read_utf8_code_point(from, maxcode);
711     return from.next;
712   }
713 
714   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
715   template<typename C>
716   const C*
ucs4_span(const C * begin,const C * end,size_t max,char32_t maxcode=max_code_point,codecvt_mode mode={})717   ucs4_span(const C* begin, const C* end, size_t max,
718             char32_t maxcode = max_code_point, codecvt_mode mode = {})
719   {
720     range<const C> from{ begin, end };
721     read_utf8_bom(from, mode);
722     char32_t c = 0;
723     while (max-- && c <= maxcode)
724       c = read_utf8_code_point(from, maxcode);
725     return from.next;
726   }
727 
728   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
729   const char16_t*
ucs4_span(range<const char16_t,false> & from,size_t max,char32_t maxcode=max_code_point,codecvt_mode mode={})730   ucs4_span(range<const char16_t, false>& from, size_t max,
731             char32_t maxcode = max_code_point, codecvt_mode mode = {})
732   {
733     read_utf16_bom(from, mode);
734     char32_t c = 0;
735     while (max-- && c <= maxcode)
736       c = read_utf16_code_point(from, maxcode, mode);
737     return reinterpret_cast<const char16_t*>(from.next);
738   }
739 }
740 
741 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
742 // Converts from UTF-8 to UTF-16.
743 
744 locale::id codecvt<char16_t, char, mbstate_t>::id;
745 
~codecvt()746 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
747 
748 codecvt_base::result
749 codecvt<char16_t, char, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const750 do_out(state_type&,
751        const intern_type* __from,
752        const intern_type* __from_end, const intern_type*& __from_next,
753        extern_type* __to, extern_type* __to_end,
754        extern_type*& __to_next) const
755 {
756   range<const char16_t> from{ __from, __from_end };
757   range<char> to{ __to, __to_end };
758   auto res = utf16_out(from, to);
759   __from_next = from.next;
760   __to_next = to.next;
761   return res;
762 }
763 
764 codecvt_base::result
765 codecvt<char16_t, char, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const766 do_unshift(state_type&, extern_type* __to, extern_type*,
767 	   extern_type*& __to_next) const
768 {
769   __to_next = __to;
770   return noconv; // we don't use mbstate_t for the unicode facets
771 }
772 
773 codecvt_base::result
774 codecvt<char16_t, char, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const775 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
776       const extern_type*& __from_next,
777       intern_type* __to, intern_type* __to_end,
778       intern_type*& __to_next) const
779 {
780   range<const char> from{ __from, __from_end };
781   range<char16_t> to{ __to, __to_end };
782 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
783   codecvt_mode mode = {};
784 #else
785   codecvt_mode mode = little_endian;
786 #endif
787   auto res = utf16_in(from, to, max_code_point, mode);
788   __from_next = from.next;
789   __to_next = to.next;
790   return res;
791 }
792 
793 int
do_encoding() const794 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
795 { return 0; } // UTF-8 is not a fixed-width encoding
796 
797 bool
do_always_noconv() const798 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
799 { return false; }
800 
801 int
802 codecvt<char16_t, char, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const803 do_length(state_type&, const extern_type* __from,
804 	  const extern_type* __end, size_t __max) const
805 {
806   __end = utf16_span(__from, __end, __max);
807   return __end - __from;
808 }
809 
810 int
do_max_length() const811 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
812 {
813   // A single character (one or two UTF-16 code units) requires
814   // up to four UTF-8 code units.
815   return 4;
816 }
817 
818 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
819 // Converts from UTF-8 to UTF-32 (aka UCS-4).
820 
821 locale::id codecvt<char32_t, char, mbstate_t>::id;
822 
~codecvt()823 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
824 
825 codecvt_base::result
826 codecvt<char32_t, char, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const827 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
828        const intern_type*& __from_next,
829        extern_type* __to, extern_type* __to_end,
830        extern_type*& __to_next) const
831 {
832   range<const char32_t> from{ __from, __from_end };
833   range<char> to{ __to, __to_end };
834   auto res = ucs4_out(from, to);
835   __from_next = from.next;
836   __to_next = to.next;
837   return res;
838 }
839 
840 codecvt_base::result
841 codecvt<char32_t, char, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const842 do_unshift(state_type&, extern_type* __to, extern_type*,
843 	   extern_type*& __to_next) const
844 {
845   __to_next = __to;
846   return noconv;
847 }
848 
849 codecvt_base::result
850 codecvt<char32_t, char, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const851 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
852       const extern_type*& __from_next,
853       intern_type* __to, intern_type* __to_end,
854       intern_type*& __to_next) const
855 {
856   range<const char> from{ __from, __from_end };
857   range<char32_t> to{ __to, __to_end };
858   auto res = ucs4_in(from, to);
859   __from_next = from.next;
860   __to_next = to.next;
861   return res;
862 }
863 
864 int
do_encoding() const865 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
866 { return 0; } // UTF-8 is not a fixed-width encoding
867 
868 bool
do_always_noconv() const869 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
870 { return false; }
871 
872 int
873 codecvt<char32_t, char, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const874 do_length(state_type&, const extern_type* __from,
875 	  const extern_type* __end, size_t __max) const
876 {
877   __end = ucs4_span(__from, __end, __max);
878   return __end - __from;
879 }
880 
881 int
do_max_length() const882 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
883 {
884   // A single character (one UTF-32 code unit) requires
885   // up to 4 UTF-8 code units.
886   return 4;
887 }
888 
889 #if defined(_GLIBCXX_USE_CHAR8_T)
890 // Define members of codecvt<char16_t, char8_t, mbstate_t> specialization.
891 // Converts from UTF-8 to UTF-16.
892 
893 locale::id codecvt<char16_t, char8_t, mbstate_t>::id;
894 
~codecvt()895 codecvt<char16_t, char8_t, mbstate_t>::~codecvt() { }
896 
897 codecvt_base::result
898 codecvt<char16_t, char8_t, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const899 do_out(state_type&,
900        const intern_type* __from,
901        const intern_type* __from_end, const intern_type*& __from_next,
902        extern_type* __to, extern_type* __to_end,
903        extern_type*& __to_next) const
904 {
905   range<const char16_t> from{ __from, __from_end };
906   range<char8_t> to{ __to, __to_end };
907   auto res = utf16_out(from, to);
908   __from_next = from.next;
909   __to_next = to.next;
910   return res;
911 }
912 
913 codecvt_base::result
914 codecvt<char16_t, char8_t, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const915 do_unshift(state_type&, extern_type* __to, extern_type*,
916 	   extern_type*& __to_next) const
917 {
918   __to_next = __to;
919   return noconv; // we don't use mbstate_t for the unicode facets
920 }
921 
922 codecvt_base::result
923 codecvt<char16_t, char8_t, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const924 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
925       const extern_type*& __from_next,
926       intern_type* __to, intern_type* __to_end,
927       intern_type*& __to_next) const
928 {
929   range<const char8_t> from{ __from, __from_end };
930   range<char16_t> to{ __to, __to_end };
931 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
932   codecvt_mode mode = {};
933 #else
934   codecvt_mode mode = little_endian;
935 #endif
936   auto res = utf16_in(from, to, max_code_point, mode);
937   __from_next = from.next;
938   __to_next = to.next;
939   return res;
940 }
941 
942 int
do_encoding() const943 codecvt<char16_t, char8_t, mbstate_t>::do_encoding() const throw()
944 { return 0; } // UTF-8 is not a fixed-width encoding
945 
946 bool
do_always_noconv() const947 codecvt<char16_t, char8_t, mbstate_t>::do_always_noconv() const throw()
948 { return false; }
949 
950 int
951 codecvt<char16_t, char8_t, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const952 do_length(state_type&, const extern_type* __from,
953 	  const extern_type* __end, size_t __max) const
954 {
955   __end = utf16_span(__from, __end, __max);
956   return __end - __from;
957 }
958 
959 int
do_max_length() const960 codecvt<char16_t, char8_t, mbstate_t>::do_max_length() const throw()
961 {
962   // A single character (one or two UTF-16 code units) requires
963   // up to four UTF-8 code units.
964   return 4;
965 }
966 
967 // Define members of codecvt<char32_t, char8_t, mbstate_t> specialization.
968 // Converts from UTF-8 to UTF-32 (aka UCS-4).
969 
970 locale::id codecvt<char32_t, char8_t, mbstate_t>::id;
971 
~codecvt()972 codecvt<char32_t, char8_t, mbstate_t>::~codecvt() { }
973 
974 codecvt_base::result
975 codecvt<char32_t, char8_t, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const976 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
977        const intern_type*& __from_next,
978        extern_type* __to, extern_type* __to_end,
979        extern_type*& __to_next) const
980 {
981   range<const char32_t> from{ __from, __from_end };
982   range<char8_t> to{ __to, __to_end };
983   auto res = ucs4_out(from, to);
984   __from_next = from.next;
985   __to_next = to.next;
986   return res;
987 }
988 
989 codecvt_base::result
990 codecvt<char32_t, char8_t, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const991 do_unshift(state_type&, extern_type* __to, extern_type*,
992 	   extern_type*& __to_next) const
993 {
994   __to_next = __to;
995   return noconv;
996 }
997 
998 codecvt_base::result
999 codecvt<char32_t, char8_t, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1000 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1001       const extern_type*& __from_next,
1002       intern_type* __to, intern_type* __to_end,
1003       intern_type*& __to_next) const
1004 {
1005   range<const char8_t> from{ __from, __from_end };
1006   range<char32_t> to{ __to, __to_end };
1007   auto res = ucs4_in(from, to);
1008   __from_next = from.next;
1009   __to_next = to.next;
1010   return res;
1011 }
1012 
1013 int
do_encoding() const1014 codecvt<char32_t, char8_t, mbstate_t>::do_encoding() const throw()
1015 { return 0; } // UTF-8 is not a fixed-width encoding
1016 
1017 bool
do_always_noconv() const1018 codecvt<char32_t, char8_t, mbstate_t>::do_always_noconv() const throw()
1019 { return false; }
1020 
1021 int
1022 codecvt<char32_t, char8_t, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1023 do_length(state_type&, const extern_type* __from,
1024 	  const extern_type* __end, size_t __max) const
1025 {
1026   __end = ucs4_span(__from, __end, __max);
1027   return __end - __from;
1028 }
1029 
1030 int
do_max_length() const1031 codecvt<char32_t, char8_t, mbstate_t>::do_max_length() const throw()
1032 {
1033   // A single character (one UTF-32 code unit) requires
1034   // up to 4 UTF-8 code units.
1035   return 4;
1036 }
1037 #endif // _GLIBCXX_USE_CHAR8_T
1038 
1039 // Define members of codecvt_utf8<char16_t> base class implementation.
1040 // Converts from UTF-8 to UCS-2.
1041 
~__codecvt_utf8_base()1042 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
1043 
1044 codecvt_base::result
1045 __codecvt_utf8_base<char16_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1046 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1047        const intern_type*& __from_next,
1048        extern_type* __to, extern_type* __to_end,
1049        extern_type*& __to_next) const
1050 {
1051   range<const char16_t> from{ __from, __from_end };
1052   range<char> to{ __to, __to_end };
1053   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1054   __from_next = from.next;
1055   __to_next = to.next;
1056   return res;
1057 }
1058 
1059 codecvt_base::result
1060 __codecvt_utf8_base<char16_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1061 do_unshift(state_type&, extern_type* __to, extern_type*,
1062 	   extern_type*& __to_next) const
1063 {
1064   __to_next = __to;
1065   return noconv;
1066 }
1067 
1068 codecvt_base::result
1069 __codecvt_utf8_base<char16_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1070 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1071       const extern_type*& __from_next,
1072       intern_type* __to, intern_type* __to_end,
1073       intern_type*& __to_next) const
1074 {
1075   range<const char> from{ __from, __from_end };
1076   range<char16_t> to{ __to, __to_end };
1077   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1078 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1079   mode = codecvt_mode(mode | little_endian);
1080 #endif
1081   auto res = ucs2_in(from, to, _M_maxcode, mode);
1082   __from_next = from.next;
1083   __to_next = to.next;
1084   return res;
1085 }
1086 
1087 int
do_encoding() const1088 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
1089 { return 0; } // UTF-8 is not a fixed-width encoding
1090 
1091 bool
do_always_noconv() const1092 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
1093 { return false; }
1094 
1095 int
1096 __codecvt_utf8_base<char16_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1097 do_length(state_type&, const extern_type* __from,
1098 	  const extern_type* __end, size_t __max) const
1099 {
1100   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
1101   return __end - __from;
1102 }
1103 
1104 int
do_max_length() const1105 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
1106 {
1107   // A single UCS-2 character requires up to three UTF-8 code units.
1108   // (UCS-2 cannot represent characters that use four UTF-8 code units).
1109   int max = 3;
1110   if (_M_mode & consume_header)
1111     max += sizeof(utf8_bom);
1112   return max;
1113 }
1114 
1115 // Define members of codecvt_utf8<char32_t> base class implementation.
1116 // Converts from UTF-8 to UTF-32 (aka UCS-4).
1117 
~__codecvt_utf8_base()1118 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
1119 
1120 codecvt_base::result
1121 __codecvt_utf8_base<char32_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1122 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1123        const intern_type*& __from_next,
1124        extern_type* __to, extern_type* __to_end,
1125        extern_type*& __to_next) const
1126 {
1127   range<const char32_t> from{ __from, __from_end };
1128   range<char> to{ __to, __to_end };
1129   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1130   __from_next = from.next;
1131   __to_next = to.next;
1132   return res;
1133 }
1134 
1135 codecvt_base::result
1136 __codecvt_utf8_base<char32_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1137 do_unshift(state_type&, extern_type* __to, extern_type*,
1138 	   extern_type*& __to_next) const
1139 {
1140   __to_next = __to;
1141   return noconv;
1142 }
1143 
1144 codecvt_base::result
1145 __codecvt_utf8_base<char32_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1146 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1147       const extern_type*& __from_next,
1148       intern_type* __to, intern_type* __to_end,
1149       intern_type*& __to_next) const
1150 {
1151   range<const char> from{ __from, __from_end };
1152   range<char32_t> to{ __to, __to_end };
1153   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1154   __from_next = from.next;
1155   __to_next = to.next;
1156   return res;
1157 }
1158 
1159 int
do_encoding() const1160 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
1161 { return 0; } // UTF-8 is not a fixed-width encoding
1162 
1163 bool
do_always_noconv() const1164 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
1165 { return false; }
1166 
1167 int
1168 __codecvt_utf8_base<char32_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1169 do_length(state_type&, const extern_type* __from,
1170 	  const extern_type* __end, size_t __max) const
1171 {
1172   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
1173   return __end - __from;
1174 }
1175 
1176 int
do_max_length() const1177 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
1178 {
1179   // A single UCS-4 character requires up to four UTF-8 code units.
1180   int max = 4;
1181   if (_M_mode & consume_header)
1182     max += sizeof(utf8_bom);
1183   return max;
1184 }
1185 
1186 #ifdef _GLIBCXX_USE_WCHAR_T
1187 
1188 #if __SIZEOF_WCHAR_T__ == 2
1189 static_assert(sizeof(wchar_t) == sizeof(char16_t), "");
1190 #elif __SIZEOF_WCHAR_T__ == 4
1191 static_assert(sizeof(wchar_t) == sizeof(char32_t), "");
1192 #endif
1193 
1194 // Define members of codecvt_utf8<wchar_t> base class implementation.
1195 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1196 
~__codecvt_utf8_base()1197 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
1198 
1199 codecvt_base::result
1200 __codecvt_utf8_base<wchar_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1201 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1202        const intern_type*& __from_next,
1203        extern_type* __to, extern_type* __to_end,
1204        extern_type*& __to_next) const
1205 {
1206   range<char> to{ __to, __to_end };
1207 #if __SIZEOF_WCHAR_T__ == 2
1208   range<const char16_t> from{
1209     reinterpret_cast<const char16_t*>(__from),
1210     reinterpret_cast<const char16_t*>(__from_end)
1211   };
1212   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1213 #elif __SIZEOF_WCHAR_T__ == 4
1214   range<const char32_t> from{
1215     reinterpret_cast<const char32_t*>(__from),
1216     reinterpret_cast<const char32_t*>(__from_end)
1217   };
1218   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1219 #else
1220   return codecvt_base::error;
1221 #endif
1222   __from_next = reinterpret_cast<const wchar_t*>(from.next);
1223   __to_next = to.next;
1224   return res;
1225 }
1226 
1227 codecvt_base::result
1228 __codecvt_utf8_base<wchar_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1229 do_unshift(state_type&, extern_type* __to, extern_type*,
1230 	   extern_type*& __to_next) const
1231 {
1232   __to_next = __to;
1233   return noconv;
1234 }
1235 
1236 codecvt_base::result
1237 __codecvt_utf8_base<wchar_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1238 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1239       const extern_type*& __from_next,
1240       intern_type* __to, intern_type* __to_end,
1241       intern_type*& __to_next) const
1242 {
1243   range<const char> from{ __from, __from_end };
1244 #if __SIZEOF_WCHAR_T__ == 2
1245   range<char16_t> to{
1246     reinterpret_cast<char16_t*>(__to),
1247     reinterpret_cast<char16_t*>(__to_end)
1248   };
1249 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1250   codecvt_mode mode = {};
1251 #else
1252   codecvt_mode mode = little_endian;
1253 #endif
1254   auto res = ucs2_in(from, to, _M_maxcode, mode);
1255 #elif __SIZEOF_WCHAR_T__ == 4
1256   range<char32_t> to{
1257     reinterpret_cast<char32_t*>(__to),
1258     reinterpret_cast<char32_t*>(__to_end)
1259   };
1260   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1261 #else
1262   return codecvt_base::error;
1263 #endif
1264   __from_next = from.next;
1265   __to_next = reinterpret_cast<wchar_t*>(to.next);
1266   return res;
1267 }
1268 
1269 int
do_encoding() const1270 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
1271 { return 0; } // UTF-8 is not a fixed-width encoding
1272 
1273 bool
do_always_noconv() const1274 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
1275 { return false; }
1276 
1277 int
1278 __codecvt_utf8_base<wchar_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1279 do_length(state_type&, const extern_type* __from,
1280 	  const extern_type* __end, size_t __max) const
1281 {
1282 #if __SIZEOF_WCHAR_T__ == 2
1283   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
1284 #elif __SIZEOF_WCHAR_T__ == 4
1285   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
1286 #else
1287   __end = __from;
1288 #endif
1289   return __end - __from;
1290 }
1291 
1292 int
do_max_length() const1293 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
1294 {
1295 #if __SIZEOF_WCHAR_T__ == 2
1296   int max = 3; // See __codecvt_utf8_base<char16_t>::do_max_length()
1297 #else
1298   int max = 4; // See __codecvt_utf8_base<char32_t>::do_max_length()
1299 #endif
1300   if (_M_mode & consume_header)
1301     max += sizeof(utf8_bom);
1302   return max;
1303 }
1304 #endif
1305 
1306 // Define members of codecvt_utf16<char16_t> base class implementation.
1307 // Converts from UTF-16 to UCS-2.
1308 
~__codecvt_utf16_base()1309 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
1310 
1311 codecvt_base::result
1312 __codecvt_utf16_base<char16_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1313 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1314        const intern_type*& __from_next,
1315        extern_type* __to, extern_type* __to_end,
1316        extern_type*& __to_next) const
1317 {
1318   range<const char16_t> from{ __from, __from_end };
1319   range<char16_t, false> to{ __to, __to_end };
1320   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1321   __from_next = from.next;
1322   __to_next = reinterpret_cast<char*>(to.next);
1323   return res;
1324 }
1325 
1326 codecvt_base::result
1327 __codecvt_utf16_base<char16_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1328 do_unshift(state_type&, extern_type* __to, extern_type*,
1329 	   extern_type*& __to_next) const
1330 {
1331   __to_next = __to;
1332   return noconv;
1333 }
1334 
1335 codecvt_base::result
1336 __codecvt_utf16_base<char16_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1337 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1338       const extern_type*& __from_next,
1339       intern_type* __to, intern_type* __to_end,
1340       intern_type*& __to_next) const
1341 {
1342   range<const char16_t, false> from{ __from, __from_end };
1343   range<char16_t> to{ __to, __to_end };
1344   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1345   __from_next = reinterpret_cast<const char*>(from.next);
1346   __to_next = to.next;
1347   if (res == codecvt_base::ok && __from_next != __from_end)
1348     res = codecvt_base::error;
1349   return res;
1350 }
1351 
1352 int
do_encoding() const1353 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1354 { return 0; } // UTF-16 is not a fixed-width encoding
1355 
1356 bool
do_always_noconv() const1357 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1358 { return false; }
1359 
1360 int
1361 __codecvt_utf16_base<char16_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1362 do_length(state_type&, const extern_type* __from,
1363 	  const extern_type* __end, size_t __max) const
1364 {
1365   range<const char16_t, false> from{ __from, __end };
1366   const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
1367   return reinterpret_cast<const char*>(next) - __from;
1368 }
1369 
1370 int
do_max_length() const1371 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1372 {
1373   // A single UCS-2 character requires one UTF-16 code unit (so two chars).
1374   // (UCS-2 cannot represent characters that use multiple UTF-16 code units).
1375   int max = 2;
1376   if (_M_mode & consume_header)
1377     max += sizeof(utf16_bom);
1378   return max;
1379 }
1380 
1381 // Define members of codecvt_utf16<char32_t> base class implementation.
1382 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1383 
~__codecvt_utf16_base()1384 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1385 
1386 codecvt_base::result
1387 __codecvt_utf16_base<char32_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1388 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1389        const intern_type*& __from_next,
1390        extern_type* __to, extern_type* __to_end,
1391        extern_type*& __to_next) const
1392 {
1393   range<const char32_t> from{ __from, __from_end };
1394   range<char16_t, false> to{ __to, __to_end };
1395   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1396   __from_next = from.next;
1397   __to_next = reinterpret_cast<char*>(to.next);
1398   return res;
1399 }
1400 
1401 codecvt_base::result
1402 __codecvt_utf16_base<char32_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1403 do_unshift(state_type&, extern_type* __to, extern_type*,
1404 	   extern_type*& __to_next) const
1405 {
1406   __to_next = __to;
1407   return noconv;
1408 }
1409 
1410 codecvt_base::result
1411 __codecvt_utf16_base<char32_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1412 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1413       const extern_type*& __from_next,
1414       intern_type* __to, intern_type* __to_end,
1415       intern_type*& __to_next) const
1416 {
1417   range<const char16_t, false> from{ __from, __from_end };
1418   range<char32_t> to{ __to, __to_end };
1419   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1420   __from_next = reinterpret_cast<const char*>(from.next);
1421   __to_next = to.next;
1422   if (res == codecvt_base::ok && __from_next != __from_end)
1423     res = codecvt_base::error;
1424   return res;
1425 }
1426 
1427 int
do_encoding() const1428 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1429 { return 0; } // UTF-16 is not a fixed-width encoding
1430 
1431 bool
do_always_noconv() const1432 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1433 { return false; }
1434 
1435 int
1436 __codecvt_utf16_base<char32_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1437 do_length(state_type&, const extern_type* __from,
1438 	  const extern_type* __end, size_t __max) const
1439 {
1440   range<const char16_t, false> from{ __from, __end };
1441   const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
1442   return reinterpret_cast<const char*>(next) - __from;
1443 }
1444 
1445 int
do_max_length() const1446 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1447 {
1448   // A single UCS-4 character requires one or two UTF-16 code units
1449   // (so up to four chars).
1450   int max = 4;
1451   if (_M_mode & consume_header)
1452     max += sizeof(utf16_bom);
1453   return max;
1454 }
1455 
1456 #ifdef _GLIBCXX_USE_WCHAR_T
1457 // Define members of codecvt_utf16<wchar_t> base class implementation.
1458 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1459 
~__codecvt_utf16_base()1460 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1461 
1462 codecvt_base::result
1463 __codecvt_utf16_base<wchar_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1464 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1465        const intern_type*& __from_next,
1466        extern_type* __to, extern_type* __to_end,
1467        extern_type*& __to_next) const
1468 {
1469   range<char16_t, false> to{ __to, __to_end };
1470 #if __SIZEOF_WCHAR_T__ == 2
1471   range<const char16_t> from{
1472     reinterpret_cast<const char16_t*>(__from),
1473     reinterpret_cast<const char16_t*>(__from_end),
1474   };
1475   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1476 #elif __SIZEOF_WCHAR_T__ == 4
1477   range<const char32_t> from{
1478     reinterpret_cast<const char32_t*>(__from),
1479     reinterpret_cast<const char32_t*>(__from_end),
1480   };
1481   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1482 #else
1483   return codecvt_base::error;
1484 #endif
1485   __from_next = reinterpret_cast<const wchar_t*>(from.next);
1486   __to_next = reinterpret_cast<char*>(to.next);
1487   return res;
1488 }
1489 
1490 codecvt_base::result
1491 __codecvt_utf16_base<wchar_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1492 do_unshift(state_type&, extern_type* __to, extern_type*,
1493 	   extern_type*& __to_next) const
1494 {
1495   __to_next = __to;
1496   return noconv;
1497 }
1498 
1499 codecvt_base::result
1500 __codecvt_utf16_base<wchar_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1501 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1502       const extern_type*& __from_next,
1503       intern_type* __to, intern_type* __to_end,
1504       intern_type*& __to_next) const
1505 {
1506   range<const char16_t, false> from{ __from, __from_end };
1507 #if __SIZEOF_WCHAR_T__ == 2
1508   range<char16_t> to{
1509     reinterpret_cast<char16_t*>(__to),
1510     reinterpret_cast<char16_t*>(__to_end),
1511   };
1512   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1513 #elif __SIZEOF_WCHAR_T__ == 4
1514   range<char32_t> to{
1515     reinterpret_cast<char32_t*>(__to),
1516     reinterpret_cast<char32_t*>(__to_end),
1517   };
1518   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1519 #else
1520   return codecvt_base::error;
1521 #endif
1522   __from_next = reinterpret_cast<const char*>(from.next);
1523   __to_next = reinterpret_cast<wchar_t*>(to.next);
1524   if (res == codecvt_base::ok && __from_next != __from_end)
1525     res = codecvt_base::error;
1526   return res;
1527 }
1528 
1529 int
do_encoding() const1530 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1531 { return 0; } // UTF-16 is not a fixed-width encoding
1532 
1533 bool
do_always_noconv() const1534 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1535 { return false; }
1536 
1537 int
1538 __codecvt_utf16_base<wchar_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1539 do_length(state_type&, const extern_type* __from,
1540 	  const extern_type* __end, size_t __max) const
1541 {
1542   range<const char16_t, false> from{ __from, __end };
1543 #if __SIZEOF_WCHAR_T__ == 2
1544   const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
1545 #elif __SIZEOF_WCHAR_T__ == 4
1546   const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
1547 #endif
1548   return reinterpret_cast<const char*>(next) - __from;
1549 }
1550 
1551 int
do_max_length() const1552 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1553 {
1554 #if __SIZEOF_WCHAR_T__ == 2
1555   int max = 2; // See __codecvt_utf16_base<char16_t>::do_max_length()
1556 #else
1557   int max = 4; // See __codecvt_utf16_base<char32_t>::do_max_length()
1558 #endif
1559   if (_M_mode & consume_header)
1560     max += sizeof(utf16_bom);
1561   return max;
1562 }
1563 #endif
1564 
1565 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1566 // Converts from UTF-8 to UTF-16.
1567 
~__codecvt_utf8_utf16_base()1568 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1569 
1570 codecvt_base::result
1571 __codecvt_utf8_utf16_base<char16_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1572 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1573        const intern_type*& __from_next,
1574        extern_type* __to, extern_type* __to_end,
1575        extern_type*& __to_next) const
1576 {
1577   range<const char16_t> from{ __from, __from_end };
1578   range<char> to{ __to, __to_end };
1579   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1580   __from_next = from.next;
1581   __to_next = to.next;
1582   return res;
1583 }
1584 
1585 codecvt_base::result
1586 __codecvt_utf8_utf16_base<char16_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1587 do_unshift(state_type&, extern_type* __to, extern_type*,
1588 	   extern_type*& __to_next) const
1589 {
1590   __to_next = __to;
1591   return noconv;
1592 }
1593 
1594 codecvt_base::result
1595 __codecvt_utf8_utf16_base<char16_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1596 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1597       const extern_type*& __from_next,
1598       intern_type* __to, intern_type* __to_end,
1599       intern_type*& __to_next) const
1600 {
1601   range<const char> from{ __from, __from_end };
1602   range<char16_t> to{ __to, __to_end };
1603   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1604 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1605   mode = codecvt_mode(mode | little_endian);
1606 #endif
1607   auto res = utf16_in(from, to, _M_maxcode, mode);
1608   __from_next = from.next;
1609   __to_next = to.next;
1610   return res;
1611 }
1612 
1613 int
do_encoding() const1614 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1615 { return 0; } // UTF-8 is not a fixed-width encoding
1616 
1617 bool
do_always_noconv() const1618 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1619 { return false; }
1620 
1621 int
1622 __codecvt_utf8_utf16_base<char16_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1623 do_length(state_type&, const extern_type* __from,
1624 	  const extern_type* __end, size_t __max) const
1625 {
1626   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1627   return __end - __from;
1628 }
1629 
1630 int
do_max_length() const1631 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1632 {
1633   // A single character can be 1 or 2 UTF-16 code units,
1634   // requiring up to 4 UTF-8 code units.
1635   int max = 4;
1636   if (_M_mode & consume_header)
1637     max += sizeof(utf8_bom);
1638   return max;
1639 }
1640 
1641 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1642 // Converts from UTF-8 to UTF-16.
1643 
~__codecvt_utf8_utf16_base()1644 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1645 
1646 codecvt_base::result
1647 __codecvt_utf8_utf16_base<char32_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1648 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1649        const intern_type*& __from_next,
1650        extern_type* __to, extern_type* __to_end,
1651        extern_type*& __to_next) const
1652 {
1653   range<const char32_t> from{ __from, __from_end };
1654   range<char> to{ __to, __to_end };
1655   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1656   __from_next = from.next;
1657   __to_next = to.next;
1658   return res;
1659 }
1660 
1661 codecvt_base::result
1662 __codecvt_utf8_utf16_base<char32_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1663 do_unshift(state_type&, extern_type* __to, extern_type*,
1664 	   extern_type*& __to_next) const
1665 {
1666   __to_next = __to;
1667   return noconv;
1668 }
1669 
1670 codecvt_base::result
1671 __codecvt_utf8_utf16_base<char32_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1672 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1673       const extern_type*& __from_next,
1674       intern_type* __to, intern_type* __to_end,
1675       intern_type*& __to_next) const
1676 {
1677   range<const char> from{ __from, __from_end };
1678   range<char32_t> to{ __to, __to_end };
1679   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1680 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1681   mode = codecvt_mode(mode | little_endian);
1682 #endif
1683   auto res = utf16_in(from, to, _M_maxcode, mode);
1684   __from_next = from.next;
1685   __to_next = to.next;
1686   return res;
1687 }
1688 
1689 int
do_encoding() const1690 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1691 { return 0; } // UTF-8 is not a fixed-width encoding
1692 
1693 bool
do_always_noconv() const1694 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1695 { return false; }
1696 
1697 int
1698 __codecvt_utf8_utf16_base<char32_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1699 do_length(state_type&, const extern_type* __from,
1700 	  const extern_type* __end, size_t __max) const
1701 {
1702   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1703   return __end - __from;
1704 }
1705 
1706 int
do_max_length() const1707 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1708 {
1709   // A single character can be 1 or 2 UTF-16 code units,
1710   // requiring up to 4 UTF-8 code units.
1711   int max = 4;
1712   if (_M_mode & consume_header)
1713     max += sizeof(utf8_bom);
1714   return max;
1715 }
1716 
1717 #ifdef _GLIBCXX_USE_WCHAR_T
1718 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1719 // Converts from UTF-8 to UTF-16.
1720 
~__codecvt_utf8_utf16_base()1721 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1722 
1723 codecvt_base::result
1724 __codecvt_utf8_utf16_base<wchar_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const1725 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1726        const intern_type*& __from_next,
1727        extern_type* __to, extern_type* __to_end,
1728        extern_type*& __to_next) const
1729 {
1730   range<const wchar_t> from{ __from, __from_end };
1731   range<char> to{ __to, __to_end };
1732   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1733   __from_next = from.next;
1734   __to_next = to.next;
1735   return res;
1736 }
1737 
1738 codecvt_base::result
1739 __codecvt_utf8_utf16_base<wchar_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const1740 do_unshift(state_type&, extern_type* __to, extern_type*,
1741 	   extern_type*& __to_next) const
1742 {
1743   __to_next = __to;
1744   return noconv;
1745 }
1746 
1747 codecvt_base::result
1748 __codecvt_utf8_utf16_base<wchar_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const1749 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1750       const extern_type*& __from_next,
1751       intern_type* __to, intern_type* __to_end,
1752       intern_type*& __to_next) const
1753 {
1754   range<const char> from{ __from, __from_end };
1755   range<wchar_t> to{ __to, __to_end };
1756   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1757 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1758   mode = codecvt_mode(mode | little_endian);
1759 #endif
1760   auto res = utf16_in(from, to, _M_maxcode, mode);
1761   __from_next = from.next;
1762   __to_next = to.next;
1763   return res;
1764 }
1765 
1766 int
do_encoding() const1767 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1768 { return 0; } // UTF-8 is not a fixed-width encoding
1769 
1770 bool
do_always_noconv() const1771 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1772 { return false; }
1773 
1774 int
1775 __codecvt_utf8_utf16_base<wchar_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const1776 do_length(state_type&, const extern_type* __from,
1777 	  const extern_type* __end, size_t __max) const
1778 {
1779   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1780   return __end - __from;
1781 }
1782 
1783 int
do_max_length() const1784 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1785 {
1786   // A single character can be 1 or 2 UTF-16 code units,
1787   // requiring up to 4 UTF-8 code units.
1788   int max = 4;
1789   if (_M_mode & consume_header)
1790     max += sizeof(utf8_bom);
1791   return max;
1792 }
1793 #endif
1794 
1795 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1796 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1797 template class codecvt_byname<char16_t, char, mbstate_t>;
1798 template class codecvt_byname<char32_t, char, mbstate_t>;
1799 
1800 #if defined(_GLIBCXX_USE_CHAR8_T)
1801 inline template class __codecvt_abstract_base<char16_t, char8_t, mbstate_t>;
1802 inline template class __codecvt_abstract_base<char32_t, char8_t, mbstate_t>;
1803 template class codecvt_byname<char16_t, char8_t, mbstate_t>;
1804 template class codecvt_byname<char32_t, char8_t, mbstate_t>;
1805 #endif
1806 
1807 _GLIBCXX_END_NAMESPACE_VERSION
1808 }
1809