110d565efSmrg // Locale support (codecvt) -*- C++ -*-
210d565efSmrg
3*ec02198aSmrg // Copyright (C) 2015-2020 Free Software Foundation, Inc.
410d565efSmrg //
510d565efSmrg // This file is part of the GNU ISO C++ Library. This library is free
610d565efSmrg // software; you can redistribute it and/or modify it under the
710d565efSmrg // terms of the GNU General Public License as published by the
810d565efSmrg // Free Software Foundation; either version 3, or (at your option)
910d565efSmrg // any later version.
1010d565efSmrg
1110d565efSmrg // This library is distributed in the hope that it will be useful,
1210d565efSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
1310d565efSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1410d565efSmrg // GNU General Public License for more details.
1510d565efSmrg
1610d565efSmrg // Under Section 7 of GPL version 3, you are granted additional
1710d565efSmrg // permissions described in the GCC Runtime Library Exception, version
1810d565efSmrg // 3.1, as published by the Free Software Foundation.
1910d565efSmrg
2010d565efSmrg // You should have received a copy of the GNU General Public License and
2110d565efSmrg // a copy of the GCC Runtime Library Exception along with this program;
2210d565efSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
2310d565efSmrg // <http://www.gnu.org/licenses/>.
2410d565efSmrg
2510d565efSmrg #include <codecvt>
2610d565efSmrg #include <cstring> // std::memcpy, std::memcmp
2710d565efSmrg #include <bits/stl_algobase.h> // std::min
2810d565efSmrg
2910d565efSmrg namespace std _GLIBCXX_VISIBILITY(default)
3010d565efSmrg {
3110d565efSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
3210d565efSmrg
3310d565efSmrg // The standard doesn't define these operators, which is annoying.
3410d565efSmrg static underlying_type<codecvt_mode>::type
to_integer(codecvt_mode m)3510d565efSmrg to_integer(codecvt_mode m)
3610d565efSmrg { return static_cast<underlying_type<codecvt_mode>::type>(m); }
3710d565efSmrg
operator &=(codecvt_mode & m,codecvt_mode n)3810d565efSmrg static codecvt_mode& operator&=(codecvt_mode& m, codecvt_mode n)
3910d565efSmrg { return m = codecvt_mode(to_integer(m) & to_integer(n)); }
4010d565efSmrg
operator |=(codecvt_mode & m,codecvt_mode n)4110d565efSmrg static codecvt_mode& operator|=(codecvt_mode& m, codecvt_mode n)
4210d565efSmrg { return m = codecvt_mode(to_integer(m) | to_integer(n)); }
4310d565efSmrg
operator ~(codecvt_mode m)4410d565efSmrg static codecvt_mode operator~(codecvt_mode m)
4510d565efSmrg { return codecvt_mode(~to_integer(m)); }
4610d565efSmrg
4710d565efSmrg namespace
4810d565efSmrg {
4910d565efSmrg // Largest code point that fits in a single UTF-16 code unit.
5010d565efSmrg const char32_t max_single_utf16_unit = 0xFFFF;
5110d565efSmrg
5210d565efSmrg const char32_t max_code_point = 0x10FFFF;
5310d565efSmrg
5410d565efSmrg // The functions below rely on maxcode < incomplete_mb_character
5510d565efSmrg // (which is enforced by the codecvt_utf* classes on construction).
5610d565efSmrg const char32_t incomplete_mb_character = char32_t(-2);
5710d565efSmrg const char32_t invalid_mb_sequence = char32_t(-1);
5810d565efSmrg
5910d565efSmrg // Utility type for reading and writing code units of type Elem from
6010d565efSmrg // a range defined by a pair of pointers.
6110d565efSmrg template<typename Elem, bool Aligned = true>
6210d565efSmrg struct range
6310d565efSmrg {
6410d565efSmrg Elem* next;
6510d565efSmrg Elem* end;
6610d565efSmrg
6710d565efSmrg // Write a code unit.
operator =std::__anon47761b6e0111::range6810d565efSmrg range& operator=(Elem e)
6910d565efSmrg {
7010d565efSmrg *next++ = e;
7110d565efSmrg return *this;
7210d565efSmrg }
7310d565efSmrg
7410d565efSmrg // Read the next code unit.
operator *std::__anon47761b6e0111::range7510d565efSmrg Elem operator*() const { return *next; }
7610d565efSmrg
7710d565efSmrg // Read the Nth code unit.
operator []std::__anon47761b6e0111::range7810d565efSmrg Elem operator[](size_t n) const { return next[n]; }
7910d565efSmrg
8010d565efSmrg // Move to the next code unit.
operator ++std::__anon47761b6e0111::range8110d565efSmrg range& operator++()
8210d565efSmrg {
8310d565efSmrg ++next;
8410d565efSmrg return *this;
8510d565efSmrg }
8610d565efSmrg
8710d565efSmrg // Move to the Nth code unit.
operator +=std::__anon47761b6e0111::range8810d565efSmrg range& operator+=(size_t n)
8910d565efSmrg {
9010d565efSmrg next += n;
9110d565efSmrg return *this;
9210d565efSmrg }
9310d565efSmrg
9410d565efSmrg // The number of code units remaining.
sizestd::__anon47761b6e0111::range9510d565efSmrg size_t size() const { return end - next; }
9610d565efSmrg
9710d565efSmrg // The number of bytes remaining.
nbytesstd::__anon47761b6e0111::range9810d565efSmrg size_t nbytes() const { return (const char*)end - (const char*)next; }
9910d565efSmrg };
10010d565efSmrg
10110d565efSmrg // This specialization is used when accessing char16_t values through
10210d565efSmrg // pointers to char, which might not be correctly aligned for char16_t.
10310d565efSmrg template<typename Elem>
10410d565efSmrg struct range<Elem, false>
10510d565efSmrg {
10610d565efSmrg using value_type = typename remove_const<Elem>::type;
10710d565efSmrg
10810d565efSmrg using char_pointer = typename
10910d565efSmrg conditional<is_const<Elem>::value, const char*, char*>::type;
11010d565efSmrg
11110d565efSmrg char_pointer next;
11210d565efSmrg char_pointer end;
11310d565efSmrg
11410d565efSmrg // Write a code unit.
operator =std::__anon47761b6e0111::range11510d565efSmrg range& operator=(Elem e)
11610d565efSmrg {
11710d565efSmrg memcpy(next, &e, sizeof(Elem));
11810d565efSmrg ++*this;
11910d565efSmrg return *this;
12010d565efSmrg }
12110d565efSmrg
12210d565efSmrg // Read the next code unit.
operator *std::__anon47761b6e0111::range12310d565efSmrg Elem operator*() const
12410d565efSmrg {
12510d565efSmrg value_type e;
12610d565efSmrg memcpy(&e, next, sizeof(Elem));
12710d565efSmrg return e;
12810d565efSmrg }
12910d565efSmrg
13010d565efSmrg // Read the Nth code unit.
operator []std::__anon47761b6e0111::range13110d565efSmrg Elem operator[](size_t n) const
13210d565efSmrg {
13310d565efSmrg value_type e;
13410d565efSmrg memcpy(&e, next + n * sizeof(Elem), sizeof(Elem));
13510d565efSmrg return e;
13610d565efSmrg }
13710d565efSmrg
13810d565efSmrg // Move to the next code unit.
operator ++std::__anon47761b6e0111::range13910d565efSmrg range& operator++()
14010d565efSmrg {
14110d565efSmrg next += sizeof(Elem);
14210d565efSmrg return *this;
14310d565efSmrg }
14410d565efSmrg
14510d565efSmrg // Move to the Nth code unit.
operator +=std::__anon47761b6e0111::range14610d565efSmrg range& operator+=(size_t n)
14710d565efSmrg {
14810d565efSmrg next += n * sizeof(Elem);
14910d565efSmrg return *this;
15010d565efSmrg }
15110d565efSmrg
15210d565efSmrg // The number of code units remaining.
sizestd::__anon47761b6e0111::range15310d565efSmrg size_t size() const { return nbytes() / sizeof(Elem); }
15410d565efSmrg
15510d565efSmrg // The number of bytes remaining.
nbytesstd::__anon47761b6e0111::range15610d565efSmrg size_t nbytes() const { return end - next; }
15710d565efSmrg };
15810d565efSmrg
15910d565efSmrg // Multibyte sequences can have "header" consisting of Byte Order Mark
16010d565efSmrg const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
16110d565efSmrg const unsigned char utf16_bom[2] = { 0xFE, 0xFF };
16210d565efSmrg const unsigned char utf16le_bom[2] = { 0xFF, 0xFE };
16310d565efSmrg
16410d565efSmrg // Write a BOM (space permitting).
16510d565efSmrg template<typename C, bool A, size_t N>
16610d565efSmrg bool
write_bom(range<C,A> & to,const unsigned char (& bom)[N])16710d565efSmrg write_bom(range<C, A>& to, const unsigned char (&bom)[N])
16810d565efSmrg {
16910d565efSmrg static_assert( (N / sizeof(C)) != 0, "" );
17010d565efSmrg static_assert( (N % sizeof(C)) == 0, "" );
17110d565efSmrg
17210d565efSmrg if (to.nbytes() < N)
17310d565efSmrg return false;
17410d565efSmrg memcpy(to.next, bom, N);
17510d565efSmrg to += (N / sizeof(C));
17610d565efSmrg return true;
17710d565efSmrg }
17810d565efSmrg
17910d565efSmrg // Try to read a BOM.
18010d565efSmrg template<typename C, bool A, size_t N>
18110d565efSmrg bool
read_bom(range<C,A> & from,const unsigned char (& bom)[N])18210d565efSmrg read_bom(range<C, A>& from, const unsigned char (&bom)[N])
18310d565efSmrg {
18410d565efSmrg static_assert( (N / sizeof(C)) != 0, "" );
18510d565efSmrg static_assert( (N % sizeof(C)) == 0, "" );
18610d565efSmrg
18710d565efSmrg if (from.nbytes() >= N && !memcmp(from.next, bom, N))
18810d565efSmrg {
18910d565efSmrg from += (N / sizeof(C));
19010d565efSmrg return true;
19110d565efSmrg }
19210d565efSmrg return false;
19310d565efSmrg }
19410d565efSmrg
19510d565efSmrg // If generate_header is set in mode write out UTF-8 BOM.
1960fc04c29Smrg template<typename C>
19710d565efSmrg bool
write_utf8_bom(range<C> & to,codecvt_mode mode)1980fc04c29Smrg write_utf8_bom(range<C>& to, codecvt_mode mode)
19910d565efSmrg {
20010d565efSmrg if (mode & generate_header)
20110d565efSmrg return write_bom(to, utf8_bom);
20210d565efSmrg return true;
20310d565efSmrg }
20410d565efSmrg
20510d565efSmrg // If generate_header is set in mode write out the UTF-16 BOM indicated
20610d565efSmrg // by whether little_endian is set in mode.
20710d565efSmrg template<bool Aligned>
20810d565efSmrg bool
write_utf16_bom(range<char16_t,Aligned> & to,codecvt_mode mode)20910d565efSmrg write_utf16_bom(range<char16_t, Aligned>& to, codecvt_mode mode)
21010d565efSmrg {
21110d565efSmrg if (mode & generate_header)
21210d565efSmrg {
21310d565efSmrg if (mode & little_endian)
21410d565efSmrg return write_bom(to, utf16le_bom);
21510d565efSmrg else
21610d565efSmrg return write_bom(to, utf16_bom);
21710d565efSmrg }
21810d565efSmrg return true;
21910d565efSmrg }
22010d565efSmrg
22110d565efSmrg // If consume_header is set in mode update from.next to after any BOM.
2220fc04c29Smrg template<typename C>
22310d565efSmrg void
read_utf8_bom(range<const C> & from,codecvt_mode mode)2240fc04c29Smrg read_utf8_bom(range<const C>& from, codecvt_mode mode)
22510d565efSmrg {
22610d565efSmrg if (mode & consume_header)
22710d565efSmrg read_bom(from, utf8_bom);
22810d565efSmrg }
22910d565efSmrg
23010d565efSmrg // If consume_header is not set in mode, no effects.
23110d565efSmrg // Otherwise, if *from.next is a UTF-16 BOM increment from.next and then:
23210d565efSmrg // - if the UTF-16BE BOM was found unset little_endian in mode, or
23310d565efSmrg // - if the UTF-16LE BOM was found set little_endian in mode.
23410d565efSmrg template<bool Aligned>
23510d565efSmrg void
read_utf16_bom(range<const char16_t,Aligned> & from,codecvt_mode & mode)23610d565efSmrg read_utf16_bom(range<const char16_t, Aligned>& from, codecvt_mode& mode)
23710d565efSmrg {
23810d565efSmrg if (mode & consume_header)
23910d565efSmrg {
24010d565efSmrg if (read_bom(from, utf16_bom))
24110d565efSmrg mode &= ~little_endian;
24210d565efSmrg else if (read_bom(from, utf16le_bom))
24310d565efSmrg mode |= little_endian;
24410d565efSmrg }
24510d565efSmrg }
24610d565efSmrg
24710d565efSmrg // Read a codepoint from a UTF-8 multibyte sequence.
24810d565efSmrg // Updates from.next if the codepoint is not greater than maxcode.
24910d565efSmrg // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
2500fc04c29Smrg template<typename C>
25110d565efSmrg char32_t
read_utf8_code_point(range<const C> & from,unsigned long maxcode)2520fc04c29Smrg read_utf8_code_point(range<const C>& from, unsigned long maxcode)
25310d565efSmrg {
25410d565efSmrg const size_t avail = from.size();
25510d565efSmrg if (avail == 0)
25610d565efSmrg return incomplete_mb_character;
25710d565efSmrg unsigned char c1 = from[0];
25810d565efSmrg // https://en.wikipedia.org/wiki/UTF-8#Sample_code
25910d565efSmrg if (c1 < 0x80)
26010d565efSmrg {
26110d565efSmrg ++from;
26210d565efSmrg return c1;
26310d565efSmrg }
26410d565efSmrg else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
26510d565efSmrg return invalid_mb_sequence;
26610d565efSmrg else if (c1 < 0xE0) // 2-byte sequence
26710d565efSmrg {
26810d565efSmrg if (avail < 2)
26910d565efSmrg return incomplete_mb_character;
27010d565efSmrg unsigned char c2 = from[1];
27110d565efSmrg if ((c2 & 0xC0) != 0x80)
27210d565efSmrg return invalid_mb_sequence;
27310d565efSmrg char32_t c = (c1 << 6) + c2 - 0x3080;
27410d565efSmrg if (c <= maxcode)
27510d565efSmrg from += 2;
27610d565efSmrg return c;
27710d565efSmrg }
27810d565efSmrg else if (c1 < 0xF0) // 3-byte sequence
27910d565efSmrg {
28010d565efSmrg if (avail < 3)
28110d565efSmrg return incomplete_mb_character;
28210d565efSmrg unsigned char c2 = from[1];
28310d565efSmrg if ((c2 & 0xC0) != 0x80)
28410d565efSmrg return invalid_mb_sequence;
28510d565efSmrg if (c1 == 0xE0 && c2 < 0xA0) // overlong
28610d565efSmrg return invalid_mb_sequence;
28710d565efSmrg unsigned char c3 = from[2];
28810d565efSmrg if ((c3 & 0xC0) != 0x80)
28910d565efSmrg return invalid_mb_sequence;
29010d565efSmrg char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
29110d565efSmrg if (c <= maxcode)
29210d565efSmrg from += 3;
29310d565efSmrg return c;
29410d565efSmrg }
29510d565efSmrg else if (c1 < 0xF5) // 4-byte sequence
29610d565efSmrg {
29710d565efSmrg if (avail < 4)
29810d565efSmrg return incomplete_mb_character;
29910d565efSmrg unsigned char c2 = from[1];
30010d565efSmrg if ((c2 & 0xC0) != 0x80)
30110d565efSmrg return invalid_mb_sequence;
30210d565efSmrg if (c1 == 0xF0 && c2 < 0x90) // overlong
30310d565efSmrg return invalid_mb_sequence;
30410d565efSmrg if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
30510d565efSmrg return invalid_mb_sequence;
30610d565efSmrg unsigned char c3 = from[2];
30710d565efSmrg if ((c3 & 0xC0) != 0x80)
30810d565efSmrg return invalid_mb_sequence;
30910d565efSmrg unsigned char c4 = from[3];
31010d565efSmrg if ((c4 & 0xC0) != 0x80)
31110d565efSmrg return invalid_mb_sequence;
31210d565efSmrg char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
31310d565efSmrg if (c <= maxcode)
31410d565efSmrg from += 4;
31510d565efSmrg return c;
31610d565efSmrg }
31710d565efSmrg else // > U+10FFFF
31810d565efSmrg return invalid_mb_sequence;
31910d565efSmrg }
32010d565efSmrg
3210fc04c29Smrg template<typename C>
32210d565efSmrg bool
write_utf8_code_point(range<C> & to,char32_t code_point)3230fc04c29Smrg write_utf8_code_point(range<C>& to, char32_t code_point)
32410d565efSmrg {
32510d565efSmrg if (code_point < 0x80)
32610d565efSmrg {
32710d565efSmrg if (to.size() < 1)
32810d565efSmrg return false;
32910d565efSmrg to = code_point;
33010d565efSmrg }
33110d565efSmrg else if (code_point <= 0x7FF)
33210d565efSmrg {
33310d565efSmrg if (to.size() < 2)
33410d565efSmrg return false;
33510d565efSmrg to = (code_point >> 6) + 0xC0;
33610d565efSmrg to = (code_point & 0x3F) + 0x80;
33710d565efSmrg }
33810d565efSmrg else if (code_point <= 0xFFFF)
33910d565efSmrg {
34010d565efSmrg if (to.size() < 3)
34110d565efSmrg return false;
34210d565efSmrg to = (code_point >> 12) + 0xE0;
34310d565efSmrg to = ((code_point >> 6) & 0x3F) + 0x80;
34410d565efSmrg to = (code_point & 0x3F) + 0x80;
34510d565efSmrg }
34610d565efSmrg else if (code_point <= 0x10FFFF)
34710d565efSmrg {
34810d565efSmrg if (to.size() < 4)
34910d565efSmrg return false;
35010d565efSmrg to = (code_point >> 18) + 0xF0;
35110d565efSmrg to = ((code_point >> 12) & 0x3F) + 0x80;
35210d565efSmrg to = ((code_point >> 6) & 0x3F) + 0x80;
35310d565efSmrg to = (code_point & 0x3F) + 0x80;
35410d565efSmrg }
35510d565efSmrg else
35610d565efSmrg return false;
35710d565efSmrg return true;
35810d565efSmrg }
35910d565efSmrg
36010d565efSmrg inline char16_t
adjust_byte_order(char16_t c,codecvt_mode mode)36110d565efSmrg adjust_byte_order(char16_t c, codecvt_mode mode)
36210d565efSmrg {
36310d565efSmrg #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
36410d565efSmrg return (mode & little_endian) ? __builtin_bswap16(c) : c;
36510d565efSmrg #else
36610d565efSmrg return (mode & little_endian) ? c : __builtin_bswap16(c);
36710d565efSmrg #endif
36810d565efSmrg }
36910d565efSmrg
37010d565efSmrg // Return true if c is a high-surrogate (aka leading) code point.
37110d565efSmrg inline bool
is_high_surrogate(char32_t c)37210d565efSmrg is_high_surrogate(char32_t c)
37310d565efSmrg {
37410d565efSmrg return c >= 0xD800 && c <= 0xDBFF;
37510d565efSmrg }
37610d565efSmrg
37710d565efSmrg // Return true if c is a low-surrogate (aka trailing) code point.
37810d565efSmrg inline bool
is_low_surrogate(char32_t c)37910d565efSmrg is_low_surrogate(char32_t c)
38010d565efSmrg {
38110d565efSmrg return c >= 0xDC00 && c <= 0xDFFF;
38210d565efSmrg }
38310d565efSmrg
38410d565efSmrg inline char32_t
surrogate_pair_to_code_point(char32_t high,char32_t low)38510d565efSmrg surrogate_pair_to_code_point(char32_t high, char32_t low)
38610d565efSmrg {
38710d565efSmrg return (high << 10) + low - 0x35FDC00;
38810d565efSmrg }
38910d565efSmrg
39010d565efSmrg // Read a codepoint from a UTF-16 multibyte sequence.
39110d565efSmrg // The sequence's endianness is indicated by (mode & little_endian).
39210d565efSmrg // Updates from.next if the codepoint is not greater than maxcode.
39310d565efSmrg // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
39410d565efSmrg template<bool Aligned>
39510d565efSmrg char32_t
read_utf16_code_point(range<const char16_t,Aligned> & from,unsigned long maxcode,codecvt_mode mode)39610d565efSmrg read_utf16_code_point(range<const char16_t, Aligned>& from,
39710d565efSmrg unsigned long maxcode, codecvt_mode mode)
39810d565efSmrg {
39910d565efSmrg const size_t avail = from.size();
40010d565efSmrg if (avail == 0)
40110d565efSmrg return incomplete_mb_character;
40210d565efSmrg int inc = 1;
40310d565efSmrg char32_t c = adjust_byte_order(from[0], mode);
40410d565efSmrg if (is_high_surrogate(c))
40510d565efSmrg {
40610d565efSmrg if (avail < 2)
40710d565efSmrg return incomplete_mb_character;
40810d565efSmrg const char16_t c2 = adjust_byte_order(from[1], mode);
40910d565efSmrg if (is_low_surrogate(c2))
41010d565efSmrg {
41110d565efSmrg c = surrogate_pair_to_code_point(c, c2);
41210d565efSmrg inc = 2;
41310d565efSmrg }
41410d565efSmrg else
41510d565efSmrg return invalid_mb_sequence;
41610d565efSmrg }
41710d565efSmrg else if (is_low_surrogate(c))
41810d565efSmrg return invalid_mb_sequence;
41910d565efSmrg if (c <= maxcode)
42010d565efSmrg from += inc;
42110d565efSmrg return c;
42210d565efSmrg }
42310d565efSmrg
42410d565efSmrg template<typename C, bool A>
42510d565efSmrg bool
write_utf16_code_point(range<C,A> & to,char32_t codepoint,codecvt_mode mode)42610d565efSmrg write_utf16_code_point(range<C, A>& to, char32_t codepoint, codecvt_mode mode)
42710d565efSmrg {
42810d565efSmrg static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
42910d565efSmrg
43010d565efSmrg if (codepoint <= max_single_utf16_unit)
43110d565efSmrg {
43210d565efSmrg if (to.size() > 0)
43310d565efSmrg {
43410d565efSmrg to = adjust_byte_order(codepoint, mode);
43510d565efSmrg return true;
43610d565efSmrg }
43710d565efSmrg }
43810d565efSmrg else if (to.size() > 1)
43910d565efSmrg {
44010d565efSmrg // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
44110d565efSmrg const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
44210d565efSmrg char16_t lead = LEAD_OFFSET + (codepoint >> 10);
44310d565efSmrg char16_t trail = 0xDC00 + (codepoint & 0x3FF);
44410d565efSmrg to = adjust_byte_order(lead, mode);
44510d565efSmrg to = adjust_byte_order(trail, mode);
44610d565efSmrg return true;
44710d565efSmrg }
44810d565efSmrg return false;
44910d565efSmrg }
45010d565efSmrg
45110d565efSmrg // utf8 -> ucs4
4520fc04c29Smrg template<typename C>
45310d565efSmrg codecvt_base::result
ucs4_in(range<const C> & from,range<char32_t> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})4540fc04c29Smrg ucs4_in(range<const C>& from, range<char32_t>& to,
45510d565efSmrg unsigned long maxcode = max_code_point, codecvt_mode mode = {})
45610d565efSmrg {
45710d565efSmrg read_utf8_bom(from, mode);
45810d565efSmrg while (from.size() && to.size())
45910d565efSmrg {
46010d565efSmrg const char32_t codepoint = read_utf8_code_point(from, maxcode);
46110d565efSmrg if (codepoint == incomplete_mb_character)
46210d565efSmrg return codecvt_base::partial;
46310d565efSmrg if (codepoint > maxcode)
46410d565efSmrg return codecvt_base::error;
46510d565efSmrg to = codepoint;
46610d565efSmrg }
46710d565efSmrg return from.size() ? codecvt_base::partial : codecvt_base::ok;
46810d565efSmrg }
46910d565efSmrg
47010d565efSmrg // ucs4 -> utf8
4710fc04c29Smrg template<typename C>
47210d565efSmrg codecvt_base::result
ucs4_out(range<const char32_t> & from,range<C> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})4730fc04c29Smrg ucs4_out(range<const char32_t>& from, range<C>& to,
47410d565efSmrg unsigned long maxcode = max_code_point, codecvt_mode mode = {})
47510d565efSmrg {
47610d565efSmrg if (!write_utf8_bom(to, mode))
47710d565efSmrg return codecvt_base::partial;
47810d565efSmrg while (from.size())
47910d565efSmrg {
48010d565efSmrg const char32_t c = from[0];
48110d565efSmrg if (c > maxcode)
48210d565efSmrg return codecvt_base::error;
48310d565efSmrg if (!write_utf8_code_point(to, c))
48410d565efSmrg return codecvt_base::partial;
48510d565efSmrg ++from;
48610d565efSmrg }
48710d565efSmrg return codecvt_base::ok;
48810d565efSmrg }
48910d565efSmrg
49010d565efSmrg // utf16 -> ucs4
49110d565efSmrg codecvt_base::result
ucs4_in(range<const char16_t,false> & from,range<char32_t> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})49210d565efSmrg ucs4_in(range<const char16_t, false>& from, range<char32_t>& to,
49310d565efSmrg unsigned long maxcode = max_code_point, codecvt_mode mode = {})
49410d565efSmrg {
49510d565efSmrg read_utf16_bom(from, mode);
49610d565efSmrg while (from.size() && to.size())
49710d565efSmrg {
49810d565efSmrg const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
49910d565efSmrg if (codepoint == incomplete_mb_character)
50010d565efSmrg return codecvt_base::partial;
50110d565efSmrg if (codepoint > maxcode)
50210d565efSmrg return codecvt_base::error;
50310d565efSmrg to = codepoint;
50410d565efSmrg }
50510d565efSmrg return from.size() ? codecvt_base::partial : codecvt_base::ok;
50610d565efSmrg }
50710d565efSmrg
50810d565efSmrg // ucs4 -> utf16
50910d565efSmrg codecvt_base::result
ucs4_out(range<const char32_t> & from,range<char16_t,false> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={})51010d565efSmrg ucs4_out(range<const char32_t>& from, range<char16_t, false>& to,
51110d565efSmrg unsigned long maxcode = max_code_point, codecvt_mode mode = {})
51210d565efSmrg {
51310d565efSmrg if (!write_utf16_bom(to, mode))
51410d565efSmrg return codecvt_base::partial;
51510d565efSmrg while (from.size())
51610d565efSmrg {
51710d565efSmrg const char32_t c = from[0];
51810d565efSmrg if (c > maxcode)
51910d565efSmrg return codecvt_base::error;
52010d565efSmrg if (!write_utf16_code_point(to, c, mode))
52110d565efSmrg return codecvt_base::partial;
52210d565efSmrg ++from;
52310d565efSmrg }
52410d565efSmrg return codecvt_base::ok;
52510d565efSmrg }
52610d565efSmrg
52710d565efSmrg // Flag indicating whether to process UTF-16 or UCS2
52810d565efSmrg enum class surrogates { allowed, disallowed };
52910d565efSmrg
53010d565efSmrg // utf8 -> utf16 (or utf8 -> ucs2 if s == surrogates::disallowed)
5310fc04c29Smrg template<typename C8, typename C16>
53210d565efSmrg codecvt_base::result
utf16_in(range<const C8> & from,range<C16> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={},surrogates s=surrogates::allowed)5330fc04c29Smrg utf16_in(range<const C8>& from, range<C16>& to,
53410d565efSmrg unsigned long maxcode = max_code_point, codecvt_mode mode = {},
53510d565efSmrg surrogates s = surrogates::allowed)
53610d565efSmrg {
53710d565efSmrg read_utf8_bom(from, mode);
53810d565efSmrg while (from.size() && to.size())
53910d565efSmrg {
54010d565efSmrg auto orig = from;
54110d565efSmrg const char32_t codepoint = read_utf8_code_point(from, maxcode);
54210d565efSmrg if (codepoint == incomplete_mb_character)
54310d565efSmrg {
54410d565efSmrg if (s == surrogates::allowed)
54510d565efSmrg return codecvt_base::partial;
54610d565efSmrg else
54710d565efSmrg return codecvt_base::error; // No surrogates in UCS2
54810d565efSmrg }
54910d565efSmrg if (codepoint > maxcode)
55010d565efSmrg return codecvt_base::error;
55110d565efSmrg if (!write_utf16_code_point(to, codepoint, mode))
55210d565efSmrg {
55310d565efSmrg from = orig; // rewind to previous position
55410d565efSmrg return codecvt_base::partial;
55510d565efSmrg }
55610d565efSmrg }
55710d565efSmrg return codecvt_base::ok;
55810d565efSmrg }
55910d565efSmrg
56010d565efSmrg // utf16 -> utf8 (or ucs2 -> utf8 if s == surrogates::disallowed)
5610fc04c29Smrg template<typename C16, typename C8>
56210d565efSmrg codecvt_base::result
utf16_out(range<const C16> & from,range<C8> & to,unsigned long maxcode=max_code_point,codecvt_mode mode={},surrogates s=surrogates::allowed)5630fc04c29Smrg utf16_out(range<const C16>& from, range<C8>& to,
56410d565efSmrg unsigned long maxcode = max_code_point, codecvt_mode mode = {},
56510d565efSmrg surrogates s = surrogates::allowed)
56610d565efSmrg {
56710d565efSmrg if (!write_utf8_bom(to, mode))
56810d565efSmrg return codecvt_base::partial;
56910d565efSmrg while (from.size())
57010d565efSmrg {
57110d565efSmrg char32_t c = from[0];
57210d565efSmrg int inc = 1;
57310d565efSmrg if (is_high_surrogate(c))
57410d565efSmrg {
57510d565efSmrg if (s == surrogates::disallowed)
57610d565efSmrg return codecvt_base::error; // No surrogates in UCS-2
57710d565efSmrg
57810d565efSmrg if (from.size() < 2)
57910d565efSmrg return codecvt_base::ok; // stop converting at this point
58010d565efSmrg
58110d565efSmrg const char32_t c2 = from[1];
58210d565efSmrg if (is_low_surrogate(c2))
58310d565efSmrg {
58410d565efSmrg c = surrogate_pair_to_code_point(c, c2);
58510d565efSmrg inc = 2;
58610d565efSmrg }
58710d565efSmrg else
58810d565efSmrg return codecvt_base::error;
58910d565efSmrg }
59010d565efSmrg else if (is_low_surrogate(c))
59110d565efSmrg return codecvt_base::error;
59210d565efSmrg if (c > maxcode)
59310d565efSmrg return codecvt_base::error;
59410d565efSmrg if (!write_utf8_code_point(to, c))
59510d565efSmrg return codecvt_base::partial;
59610d565efSmrg from += inc;
59710d565efSmrg }
59810d565efSmrg return codecvt_base::ok;
59910d565efSmrg }
60010d565efSmrg
60110d565efSmrg // return pos such that [begin,pos) is valid UTF-16 string no longer than max
6020fc04c29Smrg template<typename C>
6030fc04c29Smrg const C*
utf16_span(const C * begin,const C * end,size_t max,char32_t maxcode=max_code_point,codecvt_mode mode={})6040fc04c29Smrg utf16_span(const C* begin, const C* end, size_t max,
60510d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
60610d565efSmrg {
6070fc04c29Smrg range<const C> from{ begin, end };
60810d565efSmrg read_utf8_bom(from, mode);
60910d565efSmrg size_t count = 0;
61010d565efSmrg while (count+1 < max)
61110d565efSmrg {
61210d565efSmrg char32_t c = read_utf8_code_point(from, maxcode);
61310d565efSmrg if (c > maxcode)
61410d565efSmrg return from.next;
61510d565efSmrg else if (c > max_single_utf16_unit)
61610d565efSmrg ++count;
61710d565efSmrg ++count;
61810d565efSmrg }
61910d565efSmrg if (count+1 == max) // take one more character if it fits in a single unit
62010d565efSmrg read_utf8_code_point(from, std::min(max_single_utf16_unit, maxcode));
62110d565efSmrg return from.next;
62210d565efSmrg }
62310d565efSmrg
62410d565efSmrg // utf8 -> ucs2
6250fc04c29Smrg template<typename C>
62610d565efSmrg codecvt_base::result
ucs2_in(range<const C> & from,range<char16_t> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})6270fc04c29Smrg ucs2_in(range<const C>& from, range<char16_t>& to,
62810d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
62910d565efSmrg {
63010d565efSmrg // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
63110d565efSmrg maxcode = std::min(max_single_utf16_unit, maxcode);
63210d565efSmrg return utf16_in(from, to, maxcode, mode, surrogates::disallowed);
63310d565efSmrg }
63410d565efSmrg
63510d565efSmrg // ucs2 -> utf8
6360fc04c29Smrg template<typename C>
63710d565efSmrg codecvt_base::result
ucs2_out(range<const char16_t> & from,range<C> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})6380fc04c29Smrg ucs2_out(range<const char16_t>& from, range<C>& to,
63910d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
64010d565efSmrg {
64110d565efSmrg // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
64210d565efSmrg maxcode = std::min(max_single_utf16_unit, maxcode);
64310d565efSmrg return utf16_out(from, to, maxcode, mode, surrogates::disallowed);
64410d565efSmrg }
64510d565efSmrg
64610d565efSmrg // ucs2 -> utf16
64710d565efSmrg codecvt_base::result
ucs2_out(range<const char16_t> & from,range<char16_t,false> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})64810d565efSmrg ucs2_out(range<const char16_t>& from, range<char16_t, false>& to,
64910d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
65010d565efSmrg {
65110d565efSmrg if (!write_utf16_bom(to, mode))
65210d565efSmrg return codecvt_base::partial;
65310d565efSmrg while (from.size() && to.size())
65410d565efSmrg {
65510d565efSmrg char16_t c = from[0];
65610d565efSmrg if (is_high_surrogate(c))
65710d565efSmrg return codecvt_base::error;
65810d565efSmrg if (c > maxcode)
65910d565efSmrg return codecvt_base::error;
66010d565efSmrg to = adjust_byte_order(c, mode);
66110d565efSmrg ++from;
66210d565efSmrg }
66310d565efSmrg return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
66410d565efSmrg }
66510d565efSmrg
66610d565efSmrg // utf16 -> ucs2
66710d565efSmrg codecvt_base::result
ucs2_in(range<const char16_t,false> & from,range<char16_t> & to,char32_t maxcode=max_code_point,codecvt_mode mode={})66810d565efSmrg ucs2_in(range<const char16_t, false>& from, range<char16_t>& to,
66910d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
67010d565efSmrg {
67110d565efSmrg read_utf16_bom(from, mode);
67210d565efSmrg // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
67310d565efSmrg maxcode = std::min(max_single_utf16_unit, maxcode);
67410d565efSmrg while (from.size() && to.size())
67510d565efSmrg {
67610d565efSmrg const char32_t c = read_utf16_code_point(from, maxcode, mode);
67710d565efSmrg if (c == incomplete_mb_character)
67810d565efSmrg return codecvt_base::error; // UCS-2 only supports single units.
67910d565efSmrg if (c > maxcode)
68010d565efSmrg return codecvt_base::error;
68110d565efSmrg to = c;
68210d565efSmrg }
68310d565efSmrg return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
68410d565efSmrg }
68510d565efSmrg
68610d565efSmrg const char16_t*
ucs2_span(range<const char16_t,false> & from,size_t max,char32_t maxcode,codecvt_mode mode)68710d565efSmrg ucs2_span(range<const char16_t, false>& from, size_t max,
68810d565efSmrg char32_t maxcode, codecvt_mode mode)
68910d565efSmrg {
69010d565efSmrg read_utf16_bom(from, mode);
69110d565efSmrg // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
69210d565efSmrg maxcode = std::min(max_single_utf16_unit, maxcode);
69310d565efSmrg char32_t c = 0;
69410d565efSmrg while (max-- && c <= maxcode)
69510d565efSmrg c = read_utf16_code_point(from, maxcode, mode);
69610d565efSmrg return reinterpret_cast<const char16_t*>(from.next);
69710d565efSmrg }
69810d565efSmrg
6990fc04c29Smrg template<typename C>
7000fc04c29Smrg const C*
ucs2_span(const C * begin,const C * end,size_t max,char32_t maxcode,codecvt_mode mode)7010fc04c29Smrg ucs2_span(const C* begin, const C* end, size_t max,
70210d565efSmrg char32_t maxcode, codecvt_mode mode)
70310d565efSmrg {
7040fc04c29Smrg range<const C> from{ begin, end };
70510d565efSmrg read_utf8_bom(from, mode);
70610d565efSmrg // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
70710d565efSmrg maxcode = std::min(max_single_utf16_unit, maxcode);
70810d565efSmrg char32_t c = 0;
70910d565efSmrg while (max-- && c <= maxcode)
71010d565efSmrg c = read_utf8_code_point(from, maxcode);
71110d565efSmrg return from.next;
71210d565efSmrg }
71310d565efSmrg
71410d565efSmrg // return pos such that [begin,pos) is valid UCS-4 string no longer than max
7150fc04c29Smrg template<typename C>
7160fc04c29Smrg const C*
ucs4_span(const C * begin,const C * end,size_t max,char32_t maxcode=max_code_point,codecvt_mode mode={})7170fc04c29Smrg ucs4_span(const C* begin, const C* end, size_t max,
71810d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
71910d565efSmrg {
7200fc04c29Smrg range<const C> from{ begin, end };
72110d565efSmrg read_utf8_bom(from, mode);
72210d565efSmrg char32_t c = 0;
72310d565efSmrg while (max-- && c <= maxcode)
72410d565efSmrg c = read_utf8_code_point(from, maxcode);
72510d565efSmrg return from.next;
72610d565efSmrg }
72710d565efSmrg
72810d565efSmrg // return pos such that [begin,pos) is valid UCS-4 string no longer than max
72910d565efSmrg const char16_t*
ucs4_span(range<const char16_t,false> & from,size_t max,char32_t maxcode=max_code_point,codecvt_mode mode={})73010d565efSmrg ucs4_span(range<const char16_t, false>& from, size_t max,
73110d565efSmrg char32_t maxcode = max_code_point, codecvt_mode mode = {})
73210d565efSmrg {
73310d565efSmrg read_utf16_bom(from, mode);
73410d565efSmrg char32_t c = 0;
73510d565efSmrg while (max-- && c <= maxcode)
73610d565efSmrg c = read_utf16_code_point(from, maxcode, mode);
73710d565efSmrg return reinterpret_cast<const char16_t*>(from.next);
73810d565efSmrg }
73910d565efSmrg }
74010d565efSmrg
74110d565efSmrg // Define members of codecvt<char16_t, char, mbstate_t> specialization.
74210d565efSmrg // Converts from UTF-8 to UTF-16.
74310d565efSmrg
74410d565efSmrg locale::id codecvt<char16_t, char, mbstate_t>::id;
74510d565efSmrg
~codecvt()74610d565efSmrg codecvt<char16_t, char, mbstate_t>::~codecvt() { }
74710d565efSmrg
74810d565efSmrg codecvt_base::result
74910d565efSmrg codecvt<char16_t, char, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const75010d565efSmrg do_out(state_type&,
75110d565efSmrg const intern_type* __from,
75210d565efSmrg const intern_type* __from_end, const intern_type*& __from_next,
75310d565efSmrg extern_type* __to, extern_type* __to_end,
75410d565efSmrg extern_type*& __to_next) const
75510d565efSmrg {
75610d565efSmrg range<const char16_t> from{ __from, __from_end };
75710d565efSmrg range<char> to{ __to, __to_end };
75810d565efSmrg auto res = utf16_out(from, to);
75910d565efSmrg __from_next = from.next;
76010d565efSmrg __to_next = to.next;
76110d565efSmrg return res;
76210d565efSmrg }
76310d565efSmrg
76410d565efSmrg codecvt_base::result
76510d565efSmrg codecvt<char16_t, char, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const76610d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
76710d565efSmrg extern_type*& __to_next) const
76810d565efSmrg {
76910d565efSmrg __to_next = __to;
77010d565efSmrg return noconv; // we don't use mbstate_t for the unicode facets
77110d565efSmrg }
77210d565efSmrg
77310d565efSmrg codecvt_base::result
77410d565efSmrg codecvt<char16_t, char, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const77510d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
77610d565efSmrg const extern_type*& __from_next,
77710d565efSmrg intern_type* __to, intern_type* __to_end,
77810d565efSmrg intern_type*& __to_next) const
77910d565efSmrg {
78010d565efSmrg range<const char> from{ __from, __from_end };
78110d565efSmrg range<char16_t> to{ __to, __to_end };
78210d565efSmrg #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
78310d565efSmrg codecvt_mode mode = {};
78410d565efSmrg #else
78510d565efSmrg codecvt_mode mode = little_endian;
78610d565efSmrg #endif
78710d565efSmrg auto res = utf16_in(from, to, max_code_point, mode);
78810d565efSmrg __from_next = from.next;
78910d565efSmrg __to_next = to.next;
79010d565efSmrg return res;
79110d565efSmrg }
79210d565efSmrg
79310d565efSmrg int
do_encoding() const79410d565efSmrg codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
79510d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
79610d565efSmrg
79710d565efSmrg bool
do_always_noconv() const79810d565efSmrg codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
79910d565efSmrg { return false; }
80010d565efSmrg
80110d565efSmrg int
80210d565efSmrg codecvt<char16_t, char, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const80310d565efSmrg do_length(state_type&, const extern_type* __from,
80410d565efSmrg const extern_type* __end, size_t __max) const
80510d565efSmrg {
80610d565efSmrg __end = utf16_span(__from, __end, __max);
80710d565efSmrg return __end - __from;
80810d565efSmrg }
80910d565efSmrg
81010d565efSmrg int
do_max_length() const81110d565efSmrg codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
81210d565efSmrg {
81310d565efSmrg // A single character (one or two UTF-16 code units) requires
81410d565efSmrg // up to four UTF-8 code units.
81510d565efSmrg return 4;
81610d565efSmrg }
81710d565efSmrg
81810d565efSmrg // Define members of codecvt<char32_t, char, mbstate_t> specialization.
81910d565efSmrg // Converts from UTF-8 to UTF-32 (aka UCS-4).
82010d565efSmrg
82110d565efSmrg locale::id codecvt<char32_t, char, mbstate_t>::id;
82210d565efSmrg
~codecvt()82310d565efSmrg codecvt<char32_t, char, mbstate_t>::~codecvt() { }
82410d565efSmrg
82510d565efSmrg codecvt_base::result
82610d565efSmrg codecvt<char32_t, char, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const82710d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
82810d565efSmrg const intern_type*& __from_next,
82910d565efSmrg extern_type* __to, extern_type* __to_end,
83010d565efSmrg extern_type*& __to_next) const
83110d565efSmrg {
83210d565efSmrg range<const char32_t> from{ __from, __from_end };
83310d565efSmrg range<char> to{ __to, __to_end };
83410d565efSmrg auto res = ucs4_out(from, to);
83510d565efSmrg __from_next = from.next;
83610d565efSmrg __to_next = to.next;
83710d565efSmrg return res;
83810d565efSmrg }
83910d565efSmrg
84010d565efSmrg codecvt_base::result
84110d565efSmrg codecvt<char32_t, char, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const84210d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
84310d565efSmrg extern_type*& __to_next) const
84410d565efSmrg {
84510d565efSmrg __to_next = __to;
84610d565efSmrg return noconv;
84710d565efSmrg }
84810d565efSmrg
84910d565efSmrg codecvt_base::result
85010d565efSmrg codecvt<char32_t, char, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const85110d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
85210d565efSmrg const extern_type*& __from_next,
85310d565efSmrg intern_type* __to, intern_type* __to_end,
85410d565efSmrg intern_type*& __to_next) const
85510d565efSmrg {
85610d565efSmrg range<const char> from{ __from, __from_end };
85710d565efSmrg range<char32_t> to{ __to, __to_end };
85810d565efSmrg auto res = ucs4_in(from, to);
85910d565efSmrg __from_next = from.next;
86010d565efSmrg __to_next = to.next;
86110d565efSmrg return res;
86210d565efSmrg }
86310d565efSmrg
86410d565efSmrg int
do_encoding() const86510d565efSmrg codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
86610d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
86710d565efSmrg
86810d565efSmrg bool
do_always_noconv() const86910d565efSmrg codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
87010d565efSmrg { return false; }
87110d565efSmrg
87210d565efSmrg int
87310d565efSmrg codecvt<char32_t, char, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const87410d565efSmrg do_length(state_type&, const extern_type* __from,
87510d565efSmrg const extern_type* __end, size_t __max) const
87610d565efSmrg {
87710d565efSmrg __end = ucs4_span(__from, __end, __max);
87810d565efSmrg return __end - __from;
87910d565efSmrg }
88010d565efSmrg
88110d565efSmrg int
do_max_length() const88210d565efSmrg codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
88310d565efSmrg {
88410d565efSmrg // A single character (one UTF-32 code unit) requires
88510d565efSmrg // up to 4 UTF-8 code units.
88610d565efSmrg return 4;
88710d565efSmrg }
88810d565efSmrg
8890fc04c29Smrg #if defined(_GLIBCXX_USE_CHAR8_T)
8900fc04c29Smrg // Define members of codecvt<char16_t, char8_t, mbstate_t> specialization.
8910fc04c29Smrg // Converts from UTF-8 to UTF-16.
8920fc04c29Smrg
8930fc04c29Smrg locale::id codecvt<char16_t, char8_t, mbstate_t>::id;
8940fc04c29Smrg
~codecvt()8950fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::~codecvt() { }
8960fc04c29Smrg
8970fc04c29Smrg codecvt_base::result
8980fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const8990fc04c29Smrg do_out(state_type&,
9000fc04c29Smrg const intern_type* __from,
9010fc04c29Smrg const intern_type* __from_end, const intern_type*& __from_next,
9020fc04c29Smrg extern_type* __to, extern_type* __to_end,
9030fc04c29Smrg extern_type*& __to_next) const
9040fc04c29Smrg {
9050fc04c29Smrg range<const char16_t> from{ __from, __from_end };
9060fc04c29Smrg range<char8_t> to{ __to, __to_end };
9070fc04c29Smrg auto res = utf16_out(from, to);
9080fc04c29Smrg __from_next = from.next;
9090fc04c29Smrg __to_next = to.next;
9100fc04c29Smrg return res;
9110fc04c29Smrg }
9120fc04c29Smrg
9130fc04c29Smrg codecvt_base::result
9140fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const9150fc04c29Smrg do_unshift(state_type&, extern_type* __to, extern_type*,
9160fc04c29Smrg extern_type*& __to_next) const
9170fc04c29Smrg {
9180fc04c29Smrg __to_next = __to;
9190fc04c29Smrg return noconv; // we don't use mbstate_t for the unicode facets
9200fc04c29Smrg }
9210fc04c29Smrg
9220fc04c29Smrg codecvt_base::result
9230fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const9240fc04c29Smrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
9250fc04c29Smrg const extern_type*& __from_next,
9260fc04c29Smrg intern_type* __to, intern_type* __to_end,
9270fc04c29Smrg intern_type*& __to_next) const
9280fc04c29Smrg {
9290fc04c29Smrg range<const char8_t> from{ __from, __from_end };
9300fc04c29Smrg range<char16_t> to{ __to, __to_end };
9310fc04c29Smrg #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
9320fc04c29Smrg codecvt_mode mode = {};
9330fc04c29Smrg #else
9340fc04c29Smrg codecvt_mode mode = little_endian;
9350fc04c29Smrg #endif
9360fc04c29Smrg auto res = utf16_in(from, to, max_code_point, mode);
9370fc04c29Smrg __from_next = from.next;
9380fc04c29Smrg __to_next = to.next;
9390fc04c29Smrg return res;
9400fc04c29Smrg }
9410fc04c29Smrg
9420fc04c29Smrg int
do_encoding() const9430fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::do_encoding() const throw()
9440fc04c29Smrg { return 0; } // UTF-8 is not a fixed-width encoding
9450fc04c29Smrg
9460fc04c29Smrg bool
do_always_noconv() const9470fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::do_always_noconv() const throw()
9480fc04c29Smrg { return false; }
9490fc04c29Smrg
9500fc04c29Smrg int
9510fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const9520fc04c29Smrg do_length(state_type&, const extern_type* __from,
9530fc04c29Smrg const extern_type* __end, size_t __max) const
9540fc04c29Smrg {
9550fc04c29Smrg __end = utf16_span(__from, __end, __max);
9560fc04c29Smrg return __end - __from;
9570fc04c29Smrg }
9580fc04c29Smrg
9590fc04c29Smrg int
do_max_length() const9600fc04c29Smrg codecvt<char16_t, char8_t, mbstate_t>::do_max_length() const throw()
9610fc04c29Smrg {
9620fc04c29Smrg // A single character (one or two UTF-16 code units) requires
9630fc04c29Smrg // up to four UTF-8 code units.
9640fc04c29Smrg return 4;
9650fc04c29Smrg }
9660fc04c29Smrg
9670fc04c29Smrg // Define members of codecvt<char32_t, char8_t, mbstate_t> specialization.
9680fc04c29Smrg // Converts from UTF-8 to UTF-32 (aka UCS-4).
9690fc04c29Smrg
9700fc04c29Smrg locale::id codecvt<char32_t, char8_t, mbstate_t>::id;
9710fc04c29Smrg
~codecvt()9720fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::~codecvt() { }
9730fc04c29Smrg
9740fc04c29Smrg codecvt_base::result
9750fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const9760fc04c29Smrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
9770fc04c29Smrg const intern_type*& __from_next,
9780fc04c29Smrg extern_type* __to, extern_type* __to_end,
9790fc04c29Smrg extern_type*& __to_next) const
9800fc04c29Smrg {
9810fc04c29Smrg range<const char32_t> from{ __from, __from_end };
9820fc04c29Smrg range<char8_t> to{ __to, __to_end };
9830fc04c29Smrg auto res = ucs4_out(from, to);
9840fc04c29Smrg __from_next = from.next;
9850fc04c29Smrg __to_next = to.next;
9860fc04c29Smrg return res;
9870fc04c29Smrg }
9880fc04c29Smrg
9890fc04c29Smrg codecvt_base::result
9900fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const9910fc04c29Smrg do_unshift(state_type&, extern_type* __to, extern_type*,
9920fc04c29Smrg extern_type*& __to_next) const
9930fc04c29Smrg {
9940fc04c29Smrg __to_next = __to;
9950fc04c29Smrg return noconv;
9960fc04c29Smrg }
9970fc04c29Smrg
9980fc04c29Smrg codecvt_base::result
9990fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const10000fc04c29Smrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
10010fc04c29Smrg const extern_type*& __from_next,
10020fc04c29Smrg intern_type* __to, intern_type* __to_end,
10030fc04c29Smrg intern_type*& __to_next) const
10040fc04c29Smrg {
10050fc04c29Smrg range<const char8_t> from{ __from, __from_end };
10060fc04c29Smrg range<char32_t> to{ __to, __to_end };
10070fc04c29Smrg auto res = ucs4_in(from, to);
10080fc04c29Smrg __from_next = from.next;
10090fc04c29Smrg __to_next = to.next;
10100fc04c29Smrg return res;
10110fc04c29Smrg }
10120fc04c29Smrg
10130fc04c29Smrg int
do_encoding() const10140fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::do_encoding() const throw()
10150fc04c29Smrg { return 0; } // UTF-8 is not a fixed-width encoding
10160fc04c29Smrg
10170fc04c29Smrg bool
do_always_noconv() const10180fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::do_always_noconv() const throw()
10190fc04c29Smrg { return false; }
10200fc04c29Smrg
10210fc04c29Smrg int
10220fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const10230fc04c29Smrg do_length(state_type&, const extern_type* __from,
10240fc04c29Smrg const extern_type* __end, size_t __max) const
10250fc04c29Smrg {
10260fc04c29Smrg __end = ucs4_span(__from, __end, __max);
10270fc04c29Smrg return __end - __from;
10280fc04c29Smrg }
10290fc04c29Smrg
10300fc04c29Smrg int
do_max_length() const10310fc04c29Smrg codecvt<char32_t, char8_t, mbstate_t>::do_max_length() const throw()
10320fc04c29Smrg {
10330fc04c29Smrg // A single character (one UTF-32 code unit) requires
10340fc04c29Smrg // up to 4 UTF-8 code units.
10350fc04c29Smrg return 4;
10360fc04c29Smrg }
10370fc04c29Smrg #endif // _GLIBCXX_USE_CHAR8_T
10380fc04c29Smrg
103910d565efSmrg // Define members of codecvt_utf8<char16_t> base class implementation.
104010d565efSmrg // Converts from UTF-8 to UCS-2.
104110d565efSmrg
~__codecvt_utf8_base()104210d565efSmrg __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
104310d565efSmrg
104410d565efSmrg codecvt_base::result
104510d565efSmrg __codecvt_utf8_base<char16_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const104610d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
104710d565efSmrg const intern_type*& __from_next,
104810d565efSmrg extern_type* __to, extern_type* __to_end,
104910d565efSmrg extern_type*& __to_next) const
105010d565efSmrg {
105110d565efSmrg range<const char16_t> from{ __from, __from_end };
105210d565efSmrg range<char> to{ __to, __to_end };
105310d565efSmrg auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
105410d565efSmrg __from_next = from.next;
105510d565efSmrg __to_next = to.next;
105610d565efSmrg return res;
105710d565efSmrg }
105810d565efSmrg
105910d565efSmrg codecvt_base::result
106010d565efSmrg __codecvt_utf8_base<char16_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const106110d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
106210d565efSmrg extern_type*& __to_next) const
106310d565efSmrg {
106410d565efSmrg __to_next = __to;
106510d565efSmrg return noconv;
106610d565efSmrg }
106710d565efSmrg
106810d565efSmrg codecvt_base::result
106910d565efSmrg __codecvt_utf8_base<char16_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const107010d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
107110d565efSmrg const extern_type*& __from_next,
107210d565efSmrg intern_type* __to, intern_type* __to_end,
107310d565efSmrg intern_type*& __to_next) const
107410d565efSmrg {
107510d565efSmrg range<const char> from{ __from, __from_end };
107610d565efSmrg range<char16_t> to{ __to, __to_end };
107710d565efSmrg codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
107810d565efSmrg #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
107910d565efSmrg mode = codecvt_mode(mode | little_endian);
108010d565efSmrg #endif
108110d565efSmrg auto res = ucs2_in(from, to, _M_maxcode, mode);
108210d565efSmrg __from_next = from.next;
108310d565efSmrg __to_next = to.next;
108410d565efSmrg return res;
108510d565efSmrg }
108610d565efSmrg
108710d565efSmrg int
do_encoding() const108810d565efSmrg __codecvt_utf8_base<char16_t>::do_encoding() const throw()
108910d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
109010d565efSmrg
109110d565efSmrg bool
do_always_noconv() const109210d565efSmrg __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
109310d565efSmrg { return false; }
109410d565efSmrg
109510d565efSmrg int
109610d565efSmrg __codecvt_utf8_base<char16_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const109710d565efSmrg do_length(state_type&, const extern_type* __from,
109810d565efSmrg const extern_type* __end, size_t __max) const
109910d565efSmrg {
110010d565efSmrg __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
110110d565efSmrg return __end - __from;
110210d565efSmrg }
110310d565efSmrg
110410d565efSmrg int
do_max_length() const110510d565efSmrg __codecvt_utf8_base<char16_t>::do_max_length() const throw()
110610d565efSmrg {
110710d565efSmrg // A single UCS-2 character requires up to three UTF-8 code units.
110810d565efSmrg // (UCS-2 cannot represent characters that use four UTF-8 code units).
110910d565efSmrg int max = 3;
111010d565efSmrg if (_M_mode & consume_header)
111110d565efSmrg max += sizeof(utf8_bom);
111210d565efSmrg return max;
111310d565efSmrg }
111410d565efSmrg
111510d565efSmrg // Define members of codecvt_utf8<char32_t> base class implementation.
111610d565efSmrg // Converts from UTF-8 to UTF-32 (aka UCS-4).
111710d565efSmrg
~__codecvt_utf8_base()111810d565efSmrg __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
111910d565efSmrg
112010d565efSmrg codecvt_base::result
112110d565efSmrg __codecvt_utf8_base<char32_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const112210d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
112310d565efSmrg const intern_type*& __from_next,
112410d565efSmrg extern_type* __to, extern_type* __to_end,
112510d565efSmrg extern_type*& __to_next) const
112610d565efSmrg {
112710d565efSmrg range<const char32_t> from{ __from, __from_end };
112810d565efSmrg range<char> to{ __to, __to_end };
112910d565efSmrg auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
113010d565efSmrg __from_next = from.next;
113110d565efSmrg __to_next = to.next;
113210d565efSmrg return res;
113310d565efSmrg }
113410d565efSmrg
113510d565efSmrg codecvt_base::result
113610d565efSmrg __codecvt_utf8_base<char32_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const113710d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
113810d565efSmrg extern_type*& __to_next) const
113910d565efSmrg {
114010d565efSmrg __to_next = __to;
114110d565efSmrg return noconv;
114210d565efSmrg }
114310d565efSmrg
114410d565efSmrg codecvt_base::result
114510d565efSmrg __codecvt_utf8_base<char32_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const114610d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
114710d565efSmrg const extern_type*& __from_next,
114810d565efSmrg intern_type* __to, intern_type* __to_end,
114910d565efSmrg intern_type*& __to_next) const
115010d565efSmrg {
115110d565efSmrg range<const char> from{ __from, __from_end };
115210d565efSmrg range<char32_t> to{ __to, __to_end };
115310d565efSmrg auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
115410d565efSmrg __from_next = from.next;
115510d565efSmrg __to_next = to.next;
115610d565efSmrg return res;
115710d565efSmrg }
115810d565efSmrg
115910d565efSmrg int
do_encoding() const116010d565efSmrg __codecvt_utf8_base<char32_t>::do_encoding() const throw()
116110d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
116210d565efSmrg
116310d565efSmrg bool
do_always_noconv() const116410d565efSmrg __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
116510d565efSmrg { return false; }
116610d565efSmrg
116710d565efSmrg int
116810d565efSmrg __codecvt_utf8_base<char32_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const116910d565efSmrg do_length(state_type&, const extern_type* __from,
117010d565efSmrg const extern_type* __end, size_t __max) const
117110d565efSmrg {
117210d565efSmrg __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
117310d565efSmrg return __end - __from;
117410d565efSmrg }
117510d565efSmrg
117610d565efSmrg int
do_max_length() const117710d565efSmrg __codecvt_utf8_base<char32_t>::do_max_length() const throw()
117810d565efSmrg {
117910d565efSmrg // A single UCS-4 character requires up to four UTF-8 code units.
118010d565efSmrg int max = 4;
118110d565efSmrg if (_M_mode & consume_header)
118210d565efSmrg max += sizeof(utf8_bom);
118310d565efSmrg return max;
118410d565efSmrg }
118510d565efSmrg
118610d565efSmrg #ifdef _GLIBCXX_USE_WCHAR_T
118710d565efSmrg
118810d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
118910d565efSmrg static_assert(sizeof(wchar_t) == sizeof(char16_t), "");
119010d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
119110d565efSmrg static_assert(sizeof(wchar_t) == sizeof(char32_t), "");
119210d565efSmrg #endif
119310d565efSmrg
119410d565efSmrg // Define members of codecvt_utf8<wchar_t> base class implementation.
119510d565efSmrg // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
119610d565efSmrg
~__codecvt_utf8_base()119710d565efSmrg __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
119810d565efSmrg
119910d565efSmrg codecvt_base::result
120010d565efSmrg __codecvt_utf8_base<wchar_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const120110d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
120210d565efSmrg const intern_type*& __from_next,
120310d565efSmrg extern_type* __to, extern_type* __to_end,
120410d565efSmrg extern_type*& __to_next) const
120510d565efSmrg {
120610d565efSmrg range<char> to{ __to, __to_end };
120710d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
120810d565efSmrg range<const char16_t> from{
120910d565efSmrg reinterpret_cast<const char16_t*>(__from),
121010d565efSmrg reinterpret_cast<const char16_t*>(__from_end)
121110d565efSmrg };
121210d565efSmrg auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
121310d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
121410d565efSmrg range<const char32_t> from{
121510d565efSmrg reinterpret_cast<const char32_t*>(__from),
121610d565efSmrg reinterpret_cast<const char32_t*>(__from_end)
121710d565efSmrg };
121810d565efSmrg auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
121910d565efSmrg #else
122010d565efSmrg return codecvt_base::error;
122110d565efSmrg #endif
122210d565efSmrg __from_next = reinterpret_cast<const wchar_t*>(from.next);
122310d565efSmrg __to_next = to.next;
122410d565efSmrg return res;
122510d565efSmrg }
122610d565efSmrg
122710d565efSmrg codecvt_base::result
122810d565efSmrg __codecvt_utf8_base<wchar_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const122910d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
123010d565efSmrg extern_type*& __to_next) const
123110d565efSmrg {
123210d565efSmrg __to_next = __to;
123310d565efSmrg return noconv;
123410d565efSmrg }
123510d565efSmrg
123610d565efSmrg codecvt_base::result
123710d565efSmrg __codecvt_utf8_base<wchar_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const123810d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
123910d565efSmrg const extern_type*& __from_next,
124010d565efSmrg intern_type* __to, intern_type* __to_end,
124110d565efSmrg intern_type*& __to_next) const
124210d565efSmrg {
124310d565efSmrg range<const char> from{ __from, __from_end };
124410d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
124510d565efSmrg range<char16_t> to{
124610d565efSmrg reinterpret_cast<char16_t*>(__to),
124710d565efSmrg reinterpret_cast<char16_t*>(__to_end)
124810d565efSmrg };
124910d565efSmrg #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
125010d565efSmrg codecvt_mode mode = {};
125110d565efSmrg #else
125210d565efSmrg codecvt_mode mode = little_endian;
125310d565efSmrg #endif
125410d565efSmrg auto res = ucs2_in(from, to, _M_maxcode, mode);
125510d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
125610d565efSmrg range<char32_t> to{
125710d565efSmrg reinterpret_cast<char32_t*>(__to),
125810d565efSmrg reinterpret_cast<char32_t*>(__to_end)
125910d565efSmrg };
126010d565efSmrg auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
126110d565efSmrg #else
126210d565efSmrg return codecvt_base::error;
126310d565efSmrg #endif
126410d565efSmrg __from_next = from.next;
126510d565efSmrg __to_next = reinterpret_cast<wchar_t*>(to.next);
126610d565efSmrg return res;
126710d565efSmrg }
126810d565efSmrg
126910d565efSmrg int
do_encoding() const127010d565efSmrg __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
127110d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
127210d565efSmrg
127310d565efSmrg bool
do_always_noconv() const127410d565efSmrg __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
127510d565efSmrg { return false; }
127610d565efSmrg
127710d565efSmrg int
127810d565efSmrg __codecvt_utf8_base<wchar_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const127910d565efSmrg do_length(state_type&, const extern_type* __from,
128010d565efSmrg const extern_type* __end, size_t __max) const
128110d565efSmrg {
128210d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
128310d565efSmrg __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
128410d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
128510d565efSmrg __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
128610d565efSmrg #else
128710d565efSmrg __end = __from;
128810d565efSmrg #endif
128910d565efSmrg return __end - __from;
129010d565efSmrg }
129110d565efSmrg
129210d565efSmrg int
do_max_length() const129310d565efSmrg __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
129410d565efSmrg {
129510d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
129610d565efSmrg int max = 3; // See __codecvt_utf8_base<char16_t>::do_max_length()
129710d565efSmrg #else
129810d565efSmrg int max = 4; // See __codecvt_utf8_base<char32_t>::do_max_length()
129910d565efSmrg #endif
130010d565efSmrg if (_M_mode & consume_header)
130110d565efSmrg max += sizeof(utf8_bom);
130210d565efSmrg return max;
130310d565efSmrg }
130410d565efSmrg #endif
130510d565efSmrg
130610d565efSmrg // Define members of codecvt_utf16<char16_t> base class implementation.
130710d565efSmrg // Converts from UTF-16 to UCS-2.
130810d565efSmrg
~__codecvt_utf16_base()130910d565efSmrg __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
131010d565efSmrg
131110d565efSmrg codecvt_base::result
131210d565efSmrg __codecvt_utf16_base<char16_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const131310d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
131410d565efSmrg const intern_type*& __from_next,
131510d565efSmrg extern_type* __to, extern_type* __to_end,
131610d565efSmrg extern_type*& __to_next) const
131710d565efSmrg {
131810d565efSmrg range<const char16_t> from{ __from, __from_end };
131910d565efSmrg range<char16_t, false> to{ __to, __to_end };
132010d565efSmrg auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
132110d565efSmrg __from_next = from.next;
132210d565efSmrg __to_next = reinterpret_cast<char*>(to.next);
132310d565efSmrg return res;
132410d565efSmrg }
132510d565efSmrg
132610d565efSmrg codecvt_base::result
132710d565efSmrg __codecvt_utf16_base<char16_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const132810d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
132910d565efSmrg extern_type*& __to_next) const
133010d565efSmrg {
133110d565efSmrg __to_next = __to;
133210d565efSmrg return noconv;
133310d565efSmrg }
133410d565efSmrg
133510d565efSmrg codecvt_base::result
133610d565efSmrg __codecvt_utf16_base<char16_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const133710d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
133810d565efSmrg const extern_type*& __from_next,
133910d565efSmrg intern_type* __to, intern_type* __to_end,
134010d565efSmrg intern_type*& __to_next) const
134110d565efSmrg {
134210d565efSmrg range<const char16_t, false> from{ __from, __from_end };
134310d565efSmrg range<char16_t> to{ __to, __to_end };
134410d565efSmrg auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
134510d565efSmrg __from_next = reinterpret_cast<const char*>(from.next);
134610d565efSmrg __to_next = to.next;
134710d565efSmrg if (res == codecvt_base::ok && __from_next != __from_end)
134810d565efSmrg res = codecvt_base::error;
134910d565efSmrg return res;
135010d565efSmrg }
135110d565efSmrg
135210d565efSmrg int
do_encoding() const135310d565efSmrg __codecvt_utf16_base<char16_t>::do_encoding() const throw()
135410d565efSmrg { return 0; } // UTF-16 is not a fixed-width encoding
135510d565efSmrg
135610d565efSmrg bool
do_always_noconv() const135710d565efSmrg __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
135810d565efSmrg { return false; }
135910d565efSmrg
136010d565efSmrg int
136110d565efSmrg __codecvt_utf16_base<char16_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const136210d565efSmrg do_length(state_type&, const extern_type* __from,
136310d565efSmrg const extern_type* __end, size_t __max) const
136410d565efSmrg {
136510d565efSmrg range<const char16_t, false> from{ __from, __end };
136610d565efSmrg const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
136710d565efSmrg return reinterpret_cast<const char*>(next) - __from;
136810d565efSmrg }
136910d565efSmrg
137010d565efSmrg int
do_max_length() const137110d565efSmrg __codecvt_utf16_base<char16_t>::do_max_length() const throw()
137210d565efSmrg {
137310d565efSmrg // A single UCS-2 character requires one UTF-16 code unit (so two chars).
137410d565efSmrg // (UCS-2 cannot represent characters that use multiple UTF-16 code units).
137510d565efSmrg int max = 2;
137610d565efSmrg if (_M_mode & consume_header)
137710d565efSmrg max += sizeof(utf16_bom);
137810d565efSmrg return max;
137910d565efSmrg }
138010d565efSmrg
138110d565efSmrg // Define members of codecvt_utf16<char32_t> base class implementation.
138210d565efSmrg // Converts from UTF-16 to UTF-32 (aka UCS-4).
138310d565efSmrg
~__codecvt_utf16_base()138410d565efSmrg __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
138510d565efSmrg
138610d565efSmrg codecvt_base::result
138710d565efSmrg __codecvt_utf16_base<char32_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const138810d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
138910d565efSmrg const intern_type*& __from_next,
139010d565efSmrg extern_type* __to, extern_type* __to_end,
139110d565efSmrg extern_type*& __to_next) const
139210d565efSmrg {
139310d565efSmrg range<const char32_t> from{ __from, __from_end };
139410d565efSmrg range<char16_t, false> to{ __to, __to_end };
139510d565efSmrg auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
139610d565efSmrg __from_next = from.next;
139710d565efSmrg __to_next = reinterpret_cast<char*>(to.next);
139810d565efSmrg return res;
139910d565efSmrg }
140010d565efSmrg
140110d565efSmrg codecvt_base::result
140210d565efSmrg __codecvt_utf16_base<char32_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const140310d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
140410d565efSmrg extern_type*& __to_next) const
140510d565efSmrg {
140610d565efSmrg __to_next = __to;
140710d565efSmrg return noconv;
140810d565efSmrg }
140910d565efSmrg
141010d565efSmrg codecvt_base::result
141110d565efSmrg __codecvt_utf16_base<char32_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const141210d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
141310d565efSmrg const extern_type*& __from_next,
141410d565efSmrg intern_type* __to, intern_type* __to_end,
141510d565efSmrg intern_type*& __to_next) const
141610d565efSmrg {
141710d565efSmrg range<const char16_t, false> from{ __from, __from_end };
141810d565efSmrg range<char32_t> to{ __to, __to_end };
141910d565efSmrg auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
142010d565efSmrg __from_next = reinterpret_cast<const char*>(from.next);
142110d565efSmrg __to_next = to.next;
142210d565efSmrg if (res == codecvt_base::ok && __from_next != __from_end)
142310d565efSmrg res = codecvt_base::error;
142410d565efSmrg return res;
142510d565efSmrg }
142610d565efSmrg
142710d565efSmrg int
do_encoding() const142810d565efSmrg __codecvt_utf16_base<char32_t>::do_encoding() const throw()
142910d565efSmrg { return 0; } // UTF-16 is not a fixed-width encoding
143010d565efSmrg
143110d565efSmrg bool
do_always_noconv() const143210d565efSmrg __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
143310d565efSmrg { return false; }
143410d565efSmrg
143510d565efSmrg int
143610d565efSmrg __codecvt_utf16_base<char32_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const143710d565efSmrg do_length(state_type&, const extern_type* __from,
143810d565efSmrg const extern_type* __end, size_t __max) const
143910d565efSmrg {
144010d565efSmrg range<const char16_t, false> from{ __from, __end };
144110d565efSmrg const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
144210d565efSmrg return reinterpret_cast<const char*>(next) - __from;
144310d565efSmrg }
144410d565efSmrg
144510d565efSmrg int
do_max_length() const144610d565efSmrg __codecvt_utf16_base<char32_t>::do_max_length() const throw()
144710d565efSmrg {
144810d565efSmrg // A single UCS-4 character requires one or two UTF-16 code units
144910d565efSmrg // (so up to four chars).
145010d565efSmrg int max = 4;
145110d565efSmrg if (_M_mode & consume_header)
145210d565efSmrg max += sizeof(utf16_bom);
145310d565efSmrg return max;
145410d565efSmrg }
145510d565efSmrg
145610d565efSmrg #ifdef _GLIBCXX_USE_WCHAR_T
145710d565efSmrg // Define members of codecvt_utf16<wchar_t> base class implementation.
145810d565efSmrg // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
145910d565efSmrg
~__codecvt_utf16_base()146010d565efSmrg __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
146110d565efSmrg
146210d565efSmrg codecvt_base::result
146310d565efSmrg __codecvt_utf16_base<wchar_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const146410d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
146510d565efSmrg const intern_type*& __from_next,
146610d565efSmrg extern_type* __to, extern_type* __to_end,
146710d565efSmrg extern_type*& __to_next) const
146810d565efSmrg {
146910d565efSmrg range<char16_t, false> to{ __to, __to_end };
147010d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
147110d565efSmrg range<const char16_t> from{
147210d565efSmrg reinterpret_cast<const char16_t*>(__from),
147310d565efSmrg reinterpret_cast<const char16_t*>(__from_end),
147410d565efSmrg };
147510d565efSmrg auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
147610d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
147710d565efSmrg range<const char32_t> from{
147810d565efSmrg reinterpret_cast<const char32_t*>(__from),
147910d565efSmrg reinterpret_cast<const char32_t*>(__from_end),
148010d565efSmrg };
148110d565efSmrg auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
148210d565efSmrg #else
148310d565efSmrg return codecvt_base::error;
148410d565efSmrg #endif
148510d565efSmrg __from_next = reinterpret_cast<const wchar_t*>(from.next);
148610d565efSmrg __to_next = reinterpret_cast<char*>(to.next);
148710d565efSmrg return res;
148810d565efSmrg }
148910d565efSmrg
149010d565efSmrg codecvt_base::result
149110d565efSmrg __codecvt_utf16_base<wchar_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const149210d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
149310d565efSmrg extern_type*& __to_next) const
149410d565efSmrg {
149510d565efSmrg __to_next = __to;
149610d565efSmrg return noconv;
149710d565efSmrg }
149810d565efSmrg
149910d565efSmrg codecvt_base::result
150010d565efSmrg __codecvt_utf16_base<wchar_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const150110d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
150210d565efSmrg const extern_type*& __from_next,
150310d565efSmrg intern_type* __to, intern_type* __to_end,
150410d565efSmrg intern_type*& __to_next) const
150510d565efSmrg {
150610d565efSmrg range<const char16_t, false> from{ __from, __from_end };
150710d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
150810d565efSmrg range<char16_t> to{
150910d565efSmrg reinterpret_cast<char16_t*>(__to),
151010d565efSmrg reinterpret_cast<char16_t*>(__to_end),
151110d565efSmrg };
151210d565efSmrg auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
151310d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
151410d565efSmrg range<char32_t> to{
151510d565efSmrg reinterpret_cast<char32_t*>(__to),
151610d565efSmrg reinterpret_cast<char32_t*>(__to_end),
151710d565efSmrg };
151810d565efSmrg auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
151910d565efSmrg #else
152010d565efSmrg return codecvt_base::error;
152110d565efSmrg #endif
152210d565efSmrg __from_next = reinterpret_cast<const char*>(from.next);
152310d565efSmrg __to_next = reinterpret_cast<wchar_t*>(to.next);
152410d565efSmrg if (res == codecvt_base::ok && __from_next != __from_end)
152510d565efSmrg res = codecvt_base::error;
152610d565efSmrg return res;
152710d565efSmrg }
152810d565efSmrg
152910d565efSmrg int
do_encoding() const153010d565efSmrg __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
153110d565efSmrg { return 0; } // UTF-16 is not a fixed-width encoding
153210d565efSmrg
153310d565efSmrg bool
do_always_noconv() const153410d565efSmrg __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
153510d565efSmrg { return false; }
153610d565efSmrg
153710d565efSmrg int
153810d565efSmrg __codecvt_utf16_base<wchar_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const153910d565efSmrg do_length(state_type&, const extern_type* __from,
154010d565efSmrg const extern_type* __end, size_t __max) const
154110d565efSmrg {
154210d565efSmrg range<const char16_t, false> from{ __from, __end };
154310d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
154410d565efSmrg const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
154510d565efSmrg #elif __SIZEOF_WCHAR_T__ == 4
154610d565efSmrg const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
154710d565efSmrg #endif
154810d565efSmrg return reinterpret_cast<const char*>(next) - __from;
154910d565efSmrg }
155010d565efSmrg
155110d565efSmrg int
do_max_length() const155210d565efSmrg __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
155310d565efSmrg {
155410d565efSmrg #if __SIZEOF_WCHAR_T__ == 2
155510d565efSmrg int max = 2; // See __codecvt_utf16_base<char16_t>::do_max_length()
155610d565efSmrg #else
155710d565efSmrg int max = 4; // See __codecvt_utf16_base<char32_t>::do_max_length()
155810d565efSmrg #endif
155910d565efSmrg if (_M_mode & consume_header)
156010d565efSmrg max += sizeof(utf16_bom);
156110d565efSmrg return max;
156210d565efSmrg }
156310d565efSmrg #endif
156410d565efSmrg
156510d565efSmrg // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
156610d565efSmrg // Converts from UTF-8 to UTF-16.
156710d565efSmrg
~__codecvt_utf8_utf16_base()156810d565efSmrg __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
156910d565efSmrg
157010d565efSmrg codecvt_base::result
157110d565efSmrg __codecvt_utf8_utf16_base<char16_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const157210d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
157310d565efSmrg const intern_type*& __from_next,
157410d565efSmrg extern_type* __to, extern_type* __to_end,
157510d565efSmrg extern_type*& __to_next) const
157610d565efSmrg {
157710d565efSmrg range<const char16_t> from{ __from, __from_end };
157810d565efSmrg range<char> to{ __to, __to_end };
157910d565efSmrg auto res = utf16_out(from, to, _M_maxcode, _M_mode);
158010d565efSmrg __from_next = from.next;
158110d565efSmrg __to_next = to.next;
158210d565efSmrg return res;
158310d565efSmrg }
158410d565efSmrg
158510d565efSmrg codecvt_base::result
158610d565efSmrg __codecvt_utf8_utf16_base<char16_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const158710d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
158810d565efSmrg extern_type*& __to_next) const
158910d565efSmrg {
159010d565efSmrg __to_next = __to;
159110d565efSmrg return noconv;
159210d565efSmrg }
159310d565efSmrg
159410d565efSmrg codecvt_base::result
159510d565efSmrg __codecvt_utf8_utf16_base<char16_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const159610d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
159710d565efSmrg const extern_type*& __from_next,
159810d565efSmrg intern_type* __to, intern_type* __to_end,
159910d565efSmrg intern_type*& __to_next) const
160010d565efSmrg {
160110d565efSmrg range<const char> from{ __from, __from_end };
160210d565efSmrg range<char16_t> to{ __to, __to_end };
160310d565efSmrg codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
160410d565efSmrg #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
160510d565efSmrg mode = codecvt_mode(mode | little_endian);
160610d565efSmrg #endif
160710d565efSmrg auto res = utf16_in(from, to, _M_maxcode, mode);
160810d565efSmrg __from_next = from.next;
160910d565efSmrg __to_next = to.next;
161010d565efSmrg return res;
161110d565efSmrg }
161210d565efSmrg
161310d565efSmrg int
do_encoding() const161410d565efSmrg __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
161510d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
161610d565efSmrg
161710d565efSmrg bool
do_always_noconv() const161810d565efSmrg __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
161910d565efSmrg { return false; }
162010d565efSmrg
162110d565efSmrg int
162210d565efSmrg __codecvt_utf8_utf16_base<char16_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const162310d565efSmrg do_length(state_type&, const extern_type* __from,
162410d565efSmrg const extern_type* __end, size_t __max) const
162510d565efSmrg {
162610d565efSmrg __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
162710d565efSmrg return __end - __from;
162810d565efSmrg }
162910d565efSmrg
163010d565efSmrg int
do_max_length() const163110d565efSmrg __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
163210d565efSmrg {
163310d565efSmrg // A single character can be 1 or 2 UTF-16 code units,
163410d565efSmrg // requiring up to 4 UTF-8 code units.
163510d565efSmrg int max = 4;
163610d565efSmrg if (_M_mode & consume_header)
163710d565efSmrg max += sizeof(utf8_bom);
163810d565efSmrg return max;
163910d565efSmrg }
164010d565efSmrg
164110d565efSmrg // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
164210d565efSmrg // Converts from UTF-8 to UTF-16.
164310d565efSmrg
~__codecvt_utf8_utf16_base()164410d565efSmrg __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
164510d565efSmrg
164610d565efSmrg codecvt_base::result
164710d565efSmrg __codecvt_utf8_utf16_base<char32_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const164810d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
164910d565efSmrg const intern_type*& __from_next,
165010d565efSmrg extern_type* __to, extern_type* __to_end,
165110d565efSmrg extern_type*& __to_next) const
165210d565efSmrg {
165310d565efSmrg range<const char32_t> from{ __from, __from_end };
165410d565efSmrg range<char> to{ __to, __to_end };
165510d565efSmrg auto res = utf16_out(from, to, _M_maxcode, _M_mode);
165610d565efSmrg __from_next = from.next;
165710d565efSmrg __to_next = to.next;
165810d565efSmrg return res;
165910d565efSmrg }
166010d565efSmrg
166110d565efSmrg codecvt_base::result
166210d565efSmrg __codecvt_utf8_utf16_base<char32_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const166310d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
166410d565efSmrg extern_type*& __to_next) const
166510d565efSmrg {
166610d565efSmrg __to_next = __to;
166710d565efSmrg return noconv;
166810d565efSmrg }
166910d565efSmrg
167010d565efSmrg codecvt_base::result
167110d565efSmrg __codecvt_utf8_utf16_base<char32_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const167210d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
167310d565efSmrg const extern_type*& __from_next,
167410d565efSmrg intern_type* __to, intern_type* __to_end,
167510d565efSmrg intern_type*& __to_next) const
167610d565efSmrg {
167710d565efSmrg range<const char> from{ __from, __from_end };
167810d565efSmrg range<char32_t> to{ __to, __to_end };
167910d565efSmrg codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
168010d565efSmrg #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
168110d565efSmrg mode = codecvt_mode(mode | little_endian);
168210d565efSmrg #endif
168310d565efSmrg auto res = utf16_in(from, to, _M_maxcode, mode);
168410d565efSmrg __from_next = from.next;
168510d565efSmrg __to_next = to.next;
168610d565efSmrg return res;
168710d565efSmrg }
168810d565efSmrg
168910d565efSmrg int
do_encoding() const169010d565efSmrg __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
169110d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
169210d565efSmrg
169310d565efSmrg bool
do_always_noconv() const169410d565efSmrg __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
169510d565efSmrg { return false; }
169610d565efSmrg
169710d565efSmrg int
169810d565efSmrg __codecvt_utf8_utf16_base<char32_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const169910d565efSmrg do_length(state_type&, const extern_type* __from,
170010d565efSmrg const extern_type* __end, size_t __max) const
170110d565efSmrg {
170210d565efSmrg __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
170310d565efSmrg return __end - __from;
170410d565efSmrg }
170510d565efSmrg
170610d565efSmrg int
do_max_length() const170710d565efSmrg __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
170810d565efSmrg {
170910d565efSmrg // A single character can be 1 or 2 UTF-16 code units,
171010d565efSmrg // requiring up to 4 UTF-8 code units.
171110d565efSmrg int max = 4;
171210d565efSmrg if (_M_mode & consume_header)
171310d565efSmrg max += sizeof(utf8_bom);
171410d565efSmrg return max;
171510d565efSmrg }
171610d565efSmrg
171710d565efSmrg #ifdef _GLIBCXX_USE_WCHAR_T
171810d565efSmrg // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
171910d565efSmrg // Converts from UTF-8 to UTF-16.
172010d565efSmrg
~__codecvt_utf8_utf16_base()172110d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
172210d565efSmrg
172310d565efSmrg codecvt_base::result
172410d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::
do_out(state_type &,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const172510d565efSmrg do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
172610d565efSmrg const intern_type*& __from_next,
172710d565efSmrg extern_type* __to, extern_type* __to_end,
172810d565efSmrg extern_type*& __to_next) const
172910d565efSmrg {
173010d565efSmrg range<const wchar_t> from{ __from, __from_end };
173110d565efSmrg range<char> to{ __to, __to_end };
173210d565efSmrg auto res = utf16_out(from, to, _M_maxcode, _M_mode);
173310d565efSmrg __from_next = from.next;
173410d565efSmrg __to_next = to.next;
173510d565efSmrg return res;
173610d565efSmrg }
173710d565efSmrg
173810d565efSmrg codecvt_base::result
173910d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::
do_unshift(state_type &,extern_type * __to,extern_type *,extern_type * & __to_next) const174010d565efSmrg do_unshift(state_type&, extern_type* __to, extern_type*,
174110d565efSmrg extern_type*& __to_next) const
174210d565efSmrg {
174310d565efSmrg __to_next = __to;
174410d565efSmrg return noconv;
174510d565efSmrg }
174610d565efSmrg
174710d565efSmrg codecvt_base::result
174810d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::
do_in(state_type &,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const174910d565efSmrg do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
175010d565efSmrg const extern_type*& __from_next,
175110d565efSmrg intern_type* __to, intern_type* __to_end,
175210d565efSmrg intern_type*& __to_next) const
175310d565efSmrg {
175410d565efSmrg range<const char> from{ __from, __from_end };
175510d565efSmrg range<wchar_t> to{ __to, __to_end };
175610d565efSmrg codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
175710d565efSmrg #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
175810d565efSmrg mode = codecvt_mode(mode | little_endian);
175910d565efSmrg #endif
176010d565efSmrg auto res = utf16_in(from, to, _M_maxcode, mode);
176110d565efSmrg __from_next = from.next;
176210d565efSmrg __to_next = to.next;
176310d565efSmrg return res;
176410d565efSmrg }
176510d565efSmrg
176610d565efSmrg int
do_encoding() const176710d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
176810d565efSmrg { return 0; } // UTF-8 is not a fixed-width encoding
176910d565efSmrg
177010d565efSmrg bool
do_always_noconv() const177110d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
177210d565efSmrg { return false; }
177310d565efSmrg
177410d565efSmrg int
177510d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::
do_length(state_type &,const extern_type * __from,const extern_type * __end,size_t __max) const177610d565efSmrg do_length(state_type&, const extern_type* __from,
177710d565efSmrg const extern_type* __end, size_t __max) const
177810d565efSmrg {
177910d565efSmrg __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
178010d565efSmrg return __end - __from;
178110d565efSmrg }
178210d565efSmrg
178310d565efSmrg int
do_max_length() const178410d565efSmrg __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
178510d565efSmrg {
178610d565efSmrg // A single character can be 1 or 2 UTF-16 code units,
178710d565efSmrg // requiring up to 4 UTF-8 code units.
178810d565efSmrg int max = 4;
178910d565efSmrg if (_M_mode & consume_header)
179010d565efSmrg max += sizeof(utf8_bom);
179110d565efSmrg return max;
179210d565efSmrg }
179310d565efSmrg #endif
179410d565efSmrg
179510d565efSmrg inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
179610d565efSmrg inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
179710d565efSmrg template class codecvt_byname<char16_t, char, mbstate_t>;
179810d565efSmrg template class codecvt_byname<char32_t, char, mbstate_t>;
179910d565efSmrg
18000fc04c29Smrg #if defined(_GLIBCXX_USE_CHAR8_T)
18010fc04c29Smrg inline template class __codecvt_abstract_base<char16_t, char8_t, mbstate_t>;
18020fc04c29Smrg inline template class __codecvt_abstract_base<char32_t, char8_t, mbstate_t>;
18030fc04c29Smrg template class codecvt_byname<char16_t, char8_t, mbstate_t>;
18040fc04c29Smrg template class codecvt_byname<char32_t, char8_t, mbstate_t>;
18050fc04c29Smrg #endif
18060fc04c29Smrg
180710d565efSmrg _GLIBCXX_END_NAMESPACE_VERSION
180810d565efSmrg }
1809