1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
3 
4 #include <boost/range/iterator_range_core.hpp>
5 
6 #include <cassert>
7 #include <utility>
8 
9 namespace pdalboost { namespace property_tree {
10     namespace json_parser { namespace detail
11 {
12 
13     struct external_wide_encoding
14     {
15         typedef wchar_t external_char;
16 
is_nlpdalboost::property_tree::json_parser::detail::external_wide_encoding17         bool is_nl(wchar_t c) const { return c == L'\n'; }
is_wspdalboost::property_tree::json_parser::detail::external_wide_encoding18         bool is_ws(wchar_t c) const {
19             return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
20         }
21 
is_minuspdalboost::property_tree::json_parser::detail::external_wide_encoding22         bool is_minus(wchar_t c) const { return c == L'-'; }
is_plusminuspdalboost::property_tree::json_parser::detail::external_wide_encoding23         bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
is_dotpdalboost::property_tree::json_parser::detail::external_wide_encoding24         bool is_dot(wchar_t c) const { return c == L'.'; }
is_eEpdalboost::property_tree::json_parser::detail::external_wide_encoding25         bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
is_0pdalboost::property_tree::json_parser::detail::external_wide_encoding26         bool is_0(wchar_t c) const { return c == L'0'; }
is_digitpdalboost::property_tree::json_parser::detail::external_wide_encoding27         bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
is_digit0pdalboost::property_tree::json_parser::detail::external_wide_encoding28         bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
29 
is_quotepdalboost::property_tree::json_parser::detail::external_wide_encoding30         bool is_quote(wchar_t c) const { return c == L'"'; }
is_backslashpdalboost::property_tree::json_parser::detail::external_wide_encoding31         bool is_backslash(wchar_t c) const { return c == L'\\'; }
is_slashpdalboost::property_tree::json_parser::detail::external_wide_encoding32         bool is_slash(wchar_t c) const { return c == L'/'; }
33 
is_commapdalboost::property_tree::json_parser::detail::external_wide_encoding34         bool is_comma(wchar_t c) const { return c == L','; }
is_open_bracketpdalboost::property_tree::json_parser::detail::external_wide_encoding35         bool is_open_bracket(wchar_t c) const { return c == L'['; }
is_close_bracketpdalboost::property_tree::json_parser::detail::external_wide_encoding36         bool is_close_bracket(wchar_t c) const { return c == L']'; }
is_colonpdalboost::property_tree::json_parser::detail::external_wide_encoding37         bool is_colon(wchar_t c) const { return c == L':'; }
is_open_bracepdalboost::property_tree::json_parser::detail::external_wide_encoding38         bool is_open_brace(wchar_t c) const { return c == L'{'; }
is_close_bracepdalboost::property_tree::json_parser::detail::external_wide_encoding39         bool is_close_brace(wchar_t c) const { return c == L'}'; }
40 
is_apdalboost::property_tree::json_parser::detail::external_wide_encoding41         bool is_a(wchar_t c) const { return c == L'a'; }
is_bpdalboost::property_tree::json_parser::detail::external_wide_encoding42         bool is_b(wchar_t c) const { return c == L'b'; }
is_epdalboost::property_tree::json_parser::detail::external_wide_encoding43         bool is_e(wchar_t c) const { return c == L'e'; }
is_fpdalboost::property_tree::json_parser::detail::external_wide_encoding44         bool is_f(wchar_t c) const { return c == L'f'; }
is_lpdalboost::property_tree::json_parser::detail::external_wide_encoding45         bool is_l(wchar_t c) const { return c == L'l'; }
is_npdalboost::property_tree::json_parser::detail::external_wide_encoding46         bool is_n(wchar_t c) const { return c == L'n'; }
is_rpdalboost::property_tree::json_parser::detail::external_wide_encoding47         bool is_r(wchar_t c) const { return c == L'r'; }
is_spdalboost::property_tree::json_parser::detail::external_wide_encoding48         bool is_s(wchar_t c) const { return c == L's'; }
is_tpdalboost::property_tree::json_parser::detail::external_wide_encoding49         bool is_t(wchar_t c) const { return c == L't'; }
is_updalboost::property_tree::json_parser::detail::external_wide_encoding50         bool is_u(wchar_t c) const { return c == L'u'; }
51 
decode_hexdigitpdalboost::property_tree::json_parser::detail::external_wide_encoding52         int decode_hexdigit(wchar_t c) {
53             if (c >= L'0' && c <= L'9') return c - L'0';
54             if (c >= L'A' && c <= L'F') return c - L'A' + 10;
55             if (c >= L'a' && c <= L'f') return c - L'a' + 10;
56             return -1;
57         }
58     };
59 
60     template <bool B> struct is_utf16 {};
61 
62     class wide_wide_encoding : public external_wide_encoding
63     {
64         typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
65     public:
66         typedef wchar_t internal_char;
67 
68         template <typename Iterator>
69         pdalboost::iterator_range<Iterator>
to_internal(Iterator first,Iterator last) const70         to_internal(Iterator first, Iterator last) const {
71             return pdalboost::make_iterator_range(first, last);
72         }
73 
to_internal_trivial(wchar_t c) const74         wchar_t to_internal_trivial(wchar_t c) const {
75             assert(!is_surrogate_high(c) && !is_surrogate_low(c));
76             return c;
77         }
78 
79         template <typename Iterator, typename Sentinel,
80                   typename EncodingErrorFn>
skip_codepoint(Iterator & cur,Sentinel end,EncodingErrorFn error_fn) const81         void skip_codepoint(Iterator& cur, Sentinel end,
82                             EncodingErrorFn error_fn) const {
83             transcode_codepoint(cur, end, DoNothing(), error_fn);
84         }
85 
86         template <typename Iterator, typename Sentinel, typename TranscodedFn,
87                   typename EncodingErrorFn>
transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn) const88         void transcode_codepoint(Iterator& cur, Sentinel end,
89                 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
90             return transcode_codepoint(cur, end, transcoded_fn, error_fn,
91                                        test_utf16());
92         }
93 
94         template <typename TranscodedFn>
feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn) const95         void feed_codepoint(unsigned codepoint,
96                             TranscodedFn transcoded_fn) const {
97             feed_codepoint(codepoint, transcoded_fn, test_utf16());
98         }
99 
100     private:
101         struct DoNothing {
operator ()pdalboost::property_tree::json_parser::detail::wide_wide_encoding::DoNothing102             void operator ()(wchar_t) const {}
103         };
104 
105         template <typename Iterator, typename Sentinel, typename TranscodedFn,
106                   typename EncodingErrorFn>
transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<false>) const107         void transcode_codepoint(Iterator& cur, Sentinel end,
108                                  TranscodedFn transcoded_fn,
109                                  EncodingErrorFn error_fn,
110                                  is_utf16<false>) const {
111             wchar_t c = *cur;
112             if (c < 0x20) {
113                 error_fn();
114             }
115             transcoded_fn(c);
116             ++cur;
117         }
118         template <typename Iterator, typename Sentinel, typename TranscodedFn,
119                   typename EncodingErrorFn>
transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<true>) const120         void transcode_codepoint(Iterator& cur, Sentinel end,
121                                  TranscodedFn transcoded_fn,
122                                  EncodingErrorFn error_fn,
123                                  is_utf16<true>) const {
124             wchar_t c = *cur;
125             if (c < 0x20) {
126                 error_fn();
127             }
128             if (is_surrogate_low(c)) {
129                 error_fn();
130             }
131             transcoded_fn(c);
132             ++cur;
133             if (is_surrogate_high(c)) {
134                 c = *cur;
135                 if (!is_surrogate_low(c)) {
136                     error_fn();
137                 }
138                 transcoded_fn(c);
139                 ++cur;
140             }
141         }
142 
143         template <typename TranscodedFn>
feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<false>) const144         void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
145                             is_utf16<false>) const {
146             transcoded_fn(static_cast<wchar_t>(codepoint));
147         }
148         template <typename TranscodedFn>
feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<true>) const149         void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
150                             is_utf16<true>) const {
151             if (codepoint < 0x10000) {
152                 transcoded_fn(static_cast<wchar_t>(codepoint));
153             } else {
154                 codepoint -= 0x10000;
155                 transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
156                 transcoded_fn(static_cast<wchar_t>(
157                     (codepoint & 0x3ff) | 0xdc00));
158             }
159         }
160 
is_surrogate_high(unsigned codepoint)161         static bool is_surrogate_high(unsigned codepoint) {
162             return (codepoint & 0xfc00) == 0xd800;
163         }
is_surrogate_low(unsigned codepoint)164         static bool is_surrogate_low(unsigned codepoint) {
165             return (codepoint & 0xfc00) == 0xdc00;
166         }
167     };
168 
169 }}}}
170 
171 #endif
172