1 #include <string>
2 #include <climits>
3 #include "char_ucs.h"
4 
5 /*
6 
7   copyright (c) 2006, 2015 squell <squell@alumina.nl>
8 
9   use, modification, copying and distribution of this software is permitted
10   under the conditions described in the file 'COPYING'.
11 
12 */
13 
14 namespace charset {
15     using namespace std;
16 
17     namespace {
18         union wide {
wide(wchar_t wc)19             wide(wchar_t wc) : code(wc) { }
20             wchar_t code;
21             char    raw[sizeof(wchar_t)];
22         };
23 
24         template<class T> inline
operator +=(std::basic_string<T> & str,const wide w)25         std::basic_string<T>& operator+=(std::basic_string<T>& str, const wide w)
26         {
27             return str += w.code;
28         }
29 
operator +=(std::string & str,const wide w)30         inline std::string& operator+=(std::string& str, const wide w)
31         {
32             return str.append(w.raw, sizeof w.raw);
33         }
34     }
35 
decode(const char * s,size_t len,byte_order ord)36     conv<>::data conv_wide::decode(const char* s, size_t len, byte_order ord)
37     {
38         if(!(len &= ~1U)) return conv<>::data();   // force len to 2k, k > 0
39         const char* end = s+len;
40 
41         conv<>::data build;
42         build.reserve(len / sizeof(wchar_t));
43         bool i = (ord == big_endian);
44 
45         switch(wide( s[0^i] & 0xFF | s[1^i]<<8 & 0xFF00U ).code) {
46         default: break;
47         case 0xFFFE: i = !i;
48         case 0xFEFF: s += 2;
49         }
50 
51         for( ; s < end; s+=2) {
52             wide ch( s[0^i] & 0xFF | s[1^i]<<8 & 0xFF00U );
53             if(ch.code < 0xD800 || ch.code >= 0xE000)
54                 build += ch;
55             else if(ch.code < 0xDC00 && (s+=2) < end) { // UTF-16 surrogate
56                 wide lo( s[0^i] & 0xFF | s[1^i]<<8 & 0xFF00U );
57                 if(lo.code >= 0xDC00 && lo.code < 0xE000)
58                     build += wide((ch.code&0x3FF)<<10 | (lo.code&0x3FF) | 0x10000);
59             }
60         }
61         return build;
62     }
63 
encode(const void * p,size_t len,byte_order ord)64     string conv_wide::encode(const void* p, size_t len, byte_order ord)
65     {
66         const wchar_t* w = (wchar_t*)p;
67         std::string build;
68         build.reserve(len);
69         int i = (ord == big_endian) * 8;
70 
71         if(ord == marked) {                    // write BOM
72             (build += '\xFF') += '\xFE';
73         }
74 
75         for( ; len--; ) {
76             wchar_t c = *w++;
77             if(c < 0x10000)                    // innocent warning by gcc
78                 (build += c>>i & 0xFF) += c>>(8^i) & 0xFF;
79             else {                             // encode a UTF16 surrogate pair
80                 c -= 0x10000;
81                 wchar_t hi = (c>>10)&0x3FF | 0xD800, lo = c&0x3FF | 0xDC00;
82                 (build += hi>>i & 0xFF) += hi>>(8^i) & 0xFF;
83                 (build += lo>>i & 0xFF) += lo>>(8^i) & 0xFF;
84             }
85         }
86         return build;
87     }
88 
89 }
90 
91